default rel
%define XMMWORD
%define YMMWORD
%define ZMMWORD
section .text code align=64

EXTERN  OPENSSL_ia32cap_P

global  sha1_block_data_order

ALIGN   16
sha1_block_data_order:
        mov     QWORD[8+rsp],rdi        ;WIN64 prologue
        mov     QWORD[16+rsp],rsi
        mov     rax,rsp
$L$SEH_begin_sha1_block_data_order:
        mov     rdi,rcx
        mov     rsi,rdx
        mov     rdx,r8



        mov     r9d,DWORD[((OPENSSL_ia32cap_P+0))]
        mov     r8d,DWORD[((OPENSSL_ia32cap_P+4))]
        mov     r10d,DWORD[((OPENSSL_ia32cap_P+8))]
        test    r8d,512
        jz      NEAR $L$ialu
        test    r10d,536870912
        jnz     NEAR _shaext_shortcut
        and     r10d,296
        cmp     r10d,296
        je      NEAR _avx2_shortcut
        and     r8d,268435456
        and     r9d,1073741824
        or      r8d,r9d
        cmp     r8d,1342177280
        je      NEAR _avx_shortcut
        jmp     NEAR _ssse3_shortcut

ALIGN   16
$L$ialu:
        mov     rax,rsp

        push    rbx

        push    rbp

        push    r12

        push    r13

        push    r14

        mov     r8,rdi
        sub     rsp,72
        mov     r9,rsi
        and     rsp,-64
        mov     r10,rdx
        mov     QWORD[64+rsp],rax

$L$prologue:

        mov     esi,DWORD[r8]
        mov     edi,DWORD[4+r8]
        mov     r11d,DWORD[8+r8]
        mov     r12d,DWORD[12+r8]
        mov     r13d,DWORD[16+r8]
        jmp     NEAR $L$loop

ALIGN   16
$L$loop:
        mov     edx,DWORD[r9]
        bswap   edx
        mov     ebp,DWORD[4+r9]
        mov     eax,r12d
        mov     DWORD[rsp],edx
        mov     ecx,esi
        bswap   ebp
        xor     eax,r11d
        rol     ecx,5
        and     eax,edi
        lea     r13d,[1518500249+r13*1+rdx]
        add     r13d,ecx
        xor     eax,r12d
        rol     edi,30
        add     r13d,eax
        mov     r14d,DWORD[8+r9]
        mov     eax,r11d
        mov     DWORD[4+rsp],ebp
        mov     ecx,r13d
        bswap   r14d
        xor     eax,edi
        rol     ecx,5
        and     eax,esi
        lea     r12d,[1518500249+r12*1+rbp]
        add     r12d,ecx
        xor     eax,r11d
        rol     esi,30
        add     r12d,eax
        mov     edx,DWORD[12+r9]
        mov     eax,edi
        mov     DWORD[8+rsp],r14d
        mov     ecx,r12d
        bswap   edx
        xor     eax,esi
        rol     ecx,5
        and     eax,r13d
        lea     r11d,[1518500249+r11*1+r14]
        add     r11d,ecx
        xor     eax,edi
        rol     r13d,30
        add     r11d,eax
        mov     ebp,DWORD[16+r9]
        mov     eax,esi
        mov     DWORD[12+rsp],edx
        mov     ecx,r11d
        bswap   ebp
        xor     eax,r13d
        rol     ecx,5
        and     eax,r12d
        lea     edi,[1518500249+rdi*1+rdx]
        add     edi,ecx
        xor     eax,esi
        rol     r12d,30
        add     edi,eax
        mov     r14d,DWORD[20+r9]
        mov     eax,r13d
        mov     DWORD[16+rsp],ebp
        mov     ecx,edi
        bswap   r14d
        xor     eax,r12d
        rol     ecx,5
        and     eax,r11d
        lea     esi,[1518500249+rsi*1+rbp]
        add     esi,ecx
        xor     eax,r13d
        rol     r11d,30
        add     esi,eax
        mov     edx,DWORD[24+r9]
        mov     eax,r12d
        mov     DWORD[20+rsp],r14d
        mov     ecx,esi
        bswap   edx
        xor     eax,r11d
        rol     ecx,5
        and     eax,edi
        lea     r13d,[1518500249+r13*1+r14]
        add     r13d,ecx
        xor     eax,r12d
        rol     edi,30
        add     r13d,eax
        mov     ebp,DWORD[28+r9]
        mov     eax,r11d
        mov     DWORD[24+rsp],edx
        mov     ecx,r13d
        bswap   ebp
        xor     eax,edi
        rol     ecx,5
        and     eax,esi
        lea     r12d,[1518500249+r12*1+rdx]
        add     r12d,ecx
        xor     eax,r11d
        rol     esi,30
        add     r12d,eax
        mov     r14d,DWORD[32+r9]
        mov     eax,edi
        mov     DWORD[28+rsp],ebp
        mov     ecx,r12d
        bswap   r14d
        xor     eax,esi
        rol     ecx,5
        and     eax,r13d
        lea     r11d,[1518500249+r11*1+rbp]
        add     r11d,ecx
        xor     eax,edi
        rol     r13d,30
        add     r11d,eax
        mov     edx,DWORD[36+r9]
        mov     eax,esi
        mov     DWORD[32+rsp],r14d
        mov     ecx,r11d
        bswap   edx
        xor     eax,r13d
        rol     ecx,5
        and     eax,r12d
        lea     edi,[1518500249+rdi*1+r14]
        add     edi,ecx
        xor     eax,esi
        rol     r12d,30
        add     edi,eax
        mov     ebp,DWORD[40+r9]
        mov     eax,r13d
        mov     DWORD[36+rsp],edx
        mov     ecx,edi
        bswap   ebp
        xor     eax,r12d
        rol     ecx,5
        and     eax,r11d
        lea     esi,[1518500249+rsi*1+rdx]
        add     esi,ecx
        xor     eax,r13d
        rol     r11d,30
        add     esi,eax
        mov     r14d,DWORD[44+r9]
        mov     eax,r12d
        mov     DWORD[40+rsp],ebp
        mov     ecx,esi
        bswap   r14d
        xor     eax,r11d
        rol     ecx,5
        and     eax,edi
        lea     r13d,[1518500249+r13*1+rbp]
        add     r13d,ecx
        xor     eax,r12d
        rol     edi,30
        add     r13d,eax
        mov     edx,DWORD[48+r9]
        mov     eax,r11d
        mov     DWORD[44+rsp],r14d
        mov     ecx,r13d
        bswap   edx
        xor     eax,edi
        rol     ecx,5
        and     eax,esi
        lea     r12d,[1518500249+r12*1+r14]
        add     r12d,ecx
        xor     eax,r11d
        rol     esi,30
        add     r12d,eax
        mov     ebp,DWORD[52+r9]
        mov     eax,edi
        mov     DWORD[48+rsp],edx
        mov     ecx,r12d
        bswap   ebp
        xor     eax,esi
        rol     ecx,5
        and     eax,r13d
        lea     r11d,[1518500249+r11*1+rdx]
        add     r11d,ecx
        xor     eax,edi
        rol     r13d,30
        add     r11d,eax
        mov     r14d,DWORD[56+r9]
        mov     eax,esi
        mov     DWORD[52+rsp],ebp
        mov     ecx,r11d
        bswap   r14d
        xor     eax,r13d
        rol     ecx,5
        and     eax,r12d
        lea     edi,[1518500249+rdi*1+rbp]
        add     edi,ecx
        xor     eax,esi
        rol     r12d,30
        add     edi,eax
        mov     edx,DWORD[60+r9]
        mov     eax,r13d
        mov     DWORD[56+rsp],r14d
        mov     ecx,edi
        bswap   edx
        xor     eax,r12d
        rol     ecx,5
        and     eax,r11d
        lea     esi,[1518500249+rsi*1+r14]
        add     esi,ecx
        xor     eax,r13d
        rol     r11d,30
        add     esi,eax
        xor     ebp,DWORD[rsp]
        mov     eax,r12d
        mov     DWORD[60+rsp],edx
        mov     ecx,esi
        xor     ebp,DWORD[8+rsp]
        xor     eax,r11d
        rol     ecx,5
        xor     ebp,DWORD[32+rsp]
        and     eax,edi
        lea     r13d,[1518500249+r13*1+rdx]
        rol     edi,30
        xor     eax,r12d
        add     r13d,ecx
        rol     ebp,1
        add     r13d,eax
        xor     r14d,DWORD[4+rsp]
        mov     eax,r11d
        mov     DWORD[rsp],ebp
        mov     ecx,r13d
        xor     r14d,DWORD[12+rsp]
        xor     eax,edi
        rol     ecx,5
        xor     r14d,DWORD[36+rsp]
        and     eax,esi
        lea     r12d,[1518500249+r12*1+rbp]
        rol     esi,30
        xor     eax,r11d
        add     r12d,ecx
        rol     r14d,1
        add     r12d,eax
        xor     edx,DWORD[8+rsp]
        mov     eax,edi
        mov     DWORD[4+rsp],r14d
        mov     ecx,r12d
        xor     edx,DWORD[16+rsp]
        xor     eax,esi
        rol     ecx,5
        xor     edx,DWORD[40+rsp]
        and     eax,r13d
        lea     r11d,[1518500249+r11*1+r14]
        rol     r13d,30
        xor     eax,edi
        add     r11d,ecx
        rol     edx,1
        add     r11d,eax
        xor     ebp,DWORD[12+rsp]
        mov     eax,esi
        mov     DWORD[8+rsp],edx
        mov     ecx,r11d
        xor     ebp,DWORD[20+rsp]
        xor     eax,r13d
        rol     ecx,5
        xor     ebp,DWORD[44+rsp]
        and     eax,r12d
        lea     edi,[1518500249+rdi*1+rdx]
        rol     r12d,30
        xor     eax,esi
        add     edi,ecx
        rol     ebp,1
        add     edi,eax
        xor     r14d,DWORD[16+rsp]
        mov     eax,r13d
        mov     DWORD[12+rsp],ebp
        mov     ecx,edi
        xor     r14d,DWORD[24+rsp]
        xor     eax,r12d
        rol     ecx,5
        xor     r14d,DWORD[48+rsp]
        and     eax,r11d
        lea     esi,[1518500249+rsi*1+rbp]
        rol     r11d,30
        xor     eax,r13d
        add     esi,ecx
        rol     r14d,1
        add     esi,eax
        xor     edx,DWORD[20+rsp]
        mov     eax,edi
        mov     DWORD[16+rsp],r14d
        mov     ecx,esi
        xor     edx,DWORD[28+rsp]
        xor     eax,r12d
        rol     ecx,5
        xor     edx,DWORD[52+rsp]
        lea     r13d,[1859775393+r13*1+r14]
        xor     eax,r11d
        add     r13d,ecx
        rol     edi,30
        add     r13d,eax
        rol     edx,1
        xor     ebp,DWORD[24+rsp]
        mov     eax,esi
        mov     DWORD[20+rsp],edx
        mov     ecx,r13d
        xor     ebp,DWORD[32+rsp]
        xor     eax,r11d
        rol     ecx,5
        xor     ebp,DWORD[56+rsp]
        lea     r12d,[1859775393+r12*1+rdx]
        xor     eax,edi
        add     r12d,ecx
        rol     esi,30
        add     r12d,eax
        rol     ebp,1
        xor     r14d,DWORD[28+rsp]
        mov     eax,r13d
        mov     DWORD[24+rsp],ebp
        mov     ecx,r12d
        xor     r14d,DWORD[36+rsp]
        xor     eax,edi
        rol     ecx,5
        xor     r14d,DWORD[60+rsp]
        lea     r11d,[1859775393+r11*1+rbp]
        xor     eax,esi
        add     r11d,ecx
        rol     r13d,30
        add     r11d,eax
        rol     r14d,1
        xor     edx,DWORD[32+rsp]
        mov     eax,r12d
        mov     DWORD[28+rsp],r14d
        mov     ecx,r11d
        xor     edx,DWORD[40+rsp]
        xor     eax,esi
        rol     ecx,5
        xor     edx,DWORD[rsp]
        lea     edi,[1859775393+rdi*1+r14]
        xor     eax,r13d
        add     edi,ecx
        rol     r12d,30
        add     edi,eax
        rol     edx,1
        xor     ebp,DWORD[36+rsp]
        mov     eax,r11d
        mov     DWORD[32+rsp],edx
        mov     ecx,edi
        xor     ebp,DWORD[44+rsp]
        xor     eax,r13d
        rol     ecx,5
        xor     ebp,DWORD[4+rsp]
        lea     esi,[1859775393+rsi*1+rdx]
        xor     eax,r12d
        add     esi,ecx
        rol     r11d,30
        add     esi,eax
        rol     ebp,1
        xor     r14d,DWORD[40+rsp]
        mov     eax,edi
        mov     DWORD[36+rsp],ebp
        mov     ecx,esi
        xor     r14d,DWORD[48+rsp]
        xor     eax,r12d
        rol     ecx,5
        xor     r14d,DWORD[8+rsp]
        lea     r13d,[1859775393+r13*1+rbp]
        xor     eax,r11d
        add     r13d,ecx
        rol     edi,30
        add     r13d,eax
        rol     r14d,1
        xor     edx,DWORD[44+rsp]
        mov     eax,esi
        mov     DWORD[40+rsp],r14d
        mov     ecx,r13d
        xor     edx,DWORD[52+rsp]
        xor     eax,r11d
        rol     ecx,5
        xor     edx,DWORD[12+rsp]
        lea     r12d,[1859775393+r12*1+r14]
        xor     eax,edi
        add     r12d,ecx
        rol     esi,30
        add     r12d,eax
        rol     edx,1
        xor     ebp,DWORD[48+rsp]
        mov     eax,r13d
        mov     DWORD[44+rsp],edx
        mov     ecx,r12d
        xor     ebp,DWORD[56+rsp]
        xor     eax,edi
        rol     ecx,5
        xor     ebp,DWORD[16+rsp]
        lea     r11d,[1859775393+r11*1+rdx]
        xor     eax,esi
        add     r11d,ecx
        rol     r13d,30
        add     r11d,eax
        rol     ebp,1
        xor     r14d,DWORD[52+rsp]
        mov     eax,r12d
        mov     DWORD[48+rsp],ebp
        mov     ecx,r11d
        xor     r14d,DWORD[60+rsp]
        xor     eax,esi
        rol     ecx,5
        xor     r14d,DWORD[20+rsp]
        lea     edi,[1859775393+rdi*1+rbp]
        xor     eax,r13d
        add     edi,ecx
        rol     r12d,30
        add     edi,eax
        rol     r14d,1
        xor     edx,DWORD[56+rsp]
        mov     eax,r11d
        mov     DWORD[52+rsp],r14d
        mov     ecx,edi
        xor     edx,DWORD[rsp]
        xor     eax,r13d
        rol     ecx,5
        xor     edx,DWORD[24+rsp]
        lea     esi,[1859775393+rsi*1+r14]
        xor     eax,r12d
        add     esi,ecx
        rol     r11d,30
        add     esi,eax
        rol     edx,1
        xor     ebp,DWORD[60+rsp]
        mov     eax,edi
        mov     DWORD[56+rsp],edx
        mov     ecx,esi
        xor     ebp,DWORD[4+rsp]
        xor     eax,r12d
        rol     ecx,5
        xor     ebp,DWORD[28+rsp]
        lea     r13d,[1859775393+r13*1+rdx]
        xor     eax,r11d
        add     r13d,ecx
        rol     edi,30
        add     r13d,eax
        rol     ebp,1
        xor     r14d,DWORD[rsp]
        mov     eax,esi
        mov     DWORD[60+rsp],ebp
        mov     ecx,r13d
        xor     r14d,DWORD[8+rsp]
        xor     eax,r11d
        rol     ecx,5
        xor     r14d,DWORD[32+rsp]
        lea     r12d,[1859775393+r12*1+rbp]
        xor     eax,edi
        add     r12d,ecx
        rol     esi,30
        add     r12d,eax
        rol     r14d,1
        xor     edx,DWORD[4+rsp]
        mov     eax,r13d
        mov     DWORD[rsp],r14d
        mov     ecx,r12d
        xor     edx,DWORD[12+rsp]
        xor     eax,edi
        rol     ecx,5
        xor     edx,DWORD[36+rsp]
        lea     r11d,[1859775393+r11*1+r14]
        xor     eax,esi
        add     r11d,ecx
        rol     r13d,30
        add     r11d,eax
        rol     edx,1
        xor     ebp,DWORD[8+rsp]
        mov     eax,r12d
        mov     DWORD[4+rsp],edx
        mov     ecx,r11d
        xor     ebp,DWORD[16+rsp]
        xor     eax,esi
        rol     ecx,5
        xor     ebp,DWORD[40+rsp]
        lea     edi,[1859775393+rdi*1+rdx]
        xor     eax,r13d
        add     edi,ecx
        rol     r12d,30
        add     edi,eax
        rol     ebp,1
        xor     r14d,DWORD[12+rsp]
        mov     eax,r11d
        mov     DWORD[8+rsp],ebp
        mov     ecx,edi
        xor     r14d,DWORD[20+rsp]
        xor     eax,r13d
        rol     ecx,5
        xor     r14d,DWORD[44+rsp]
        lea     esi,[1859775393+rsi*1+rbp]
        xor     eax,r12d
        add     esi,ecx
        rol     r11d,30
        add     esi,eax
        rol     r14d,1
        xor     edx,DWORD[16+rsp]
        mov     eax,edi
        mov     DWORD[12+rsp],r14d
        mov     ecx,esi
        xor     edx,DWORD[24+rsp]
        xor     eax,r12d
        rol     ecx,5
        xor     edx,DWORD[48+rsp]
        lea     r13d,[1859775393+r13*1+r14]
        xor     eax,r11d
        add     r13d,ecx
        rol     edi,30
        add     r13d,eax
        rol     edx,1
        xor     ebp,DWORD[20+rsp]
        mov     eax,esi
        mov     DWORD[16+rsp],edx
        mov     ecx,r13d
        xor     ebp,DWORD[28+rsp]
        xor     eax,r11d
        rol     ecx,5
        xor     ebp,DWORD[52+rsp]
        lea     r12d,[1859775393+r12*1+rdx]
        xor     eax,edi
        add     r12d,ecx
        rol     esi,30
        add     r12d,eax
        rol     ebp,1
        xor     r14d,DWORD[24+rsp]
        mov     eax,r13d
        mov     DWORD[20+rsp],ebp
        mov     ecx,r12d
        xor     r14d,DWORD[32+rsp]
        xor     eax,edi
        rol     ecx,5
        xor     r14d,DWORD[56+rsp]
        lea     r11d,[1859775393+r11*1+rbp]
        xor     eax,esi
        add     r11d,ecx
        rol     r13d,30
        add     r11d,eax
        rol     r14d,1
        xor     edx,DWORD[28+rsp]
        mov     eax,r12d
        mov     DWORD[24+rsp],r14d
        mov     ecx,r11d
        xor     edx,DWORD[36+rsp]
        xor     eax,esi
        rol     ecx,5
        xor     edx,DWORD[60+rsp]
        lea     edi,[1859775393+rdi*1+r14]
        xor     eax,r13d
        add     edi,ecx
        rol     r12d,30
        add     edi,eax
        rol     edx,1
        xor     ebp,DWORD[32+rsp]
        mov     eax,r11d
        mov     DWORD[28+rsp],edx
        mov     ecx,edi
        xor     ebp,DWORD[40+rsp]
        xor     eax,r13d
        rol     ecx,5
        xor     ebp,DWORD[rsp]
        lea     esi,[1859775393+rsi*1+rdx]
        xor     eax,r12d
        add     esi,ecx
        rol     r11d,30
        add     esi,eax
        rol     ebp,1
        xor     r14d,DWORD[36+rsp]
        mov     eax,r12d
        mov     DWORD[32+rsp],ebp
        mov     ebx,r12d
        xor     r14d,DWORD[44+rsp]
        and     eax,r11d
        mov     ecx,esi
        xor     r14d,DWORD[4+rsp]
        lea     r13d,[((-1894007588))+r13*1+rbp]
        xor     ebx,r11d
        rol     ecx,5
        add     r13d,eax
        rol     r14d,1
        and     ebx,edi
        add     r13d,ecx
        rol     edi,30
        add     r13d,ebx
        xor     edx,DWORD[40+rsp]
        mov     eax,r11d
        mov     DWORD[36+rsp],r14d
        mov     ebx,r11d
        xor     edx,DWORD[48+rsp]
        and     eax,edi
        mov     ecx,r13d
        xor     edx,DWORD[8+rsp]
        lea     r12d,[((-1894007588))+r12*1+r14]
        xor     ebx,edi
        rol     ecx,5
        add     r12d,eax
        rol     edx,1
        and     ebx,esi
        add     r12d,ecx
        rol     esi,30
        add     r12d,ebx
        xor     ebp,DWORD[44+rsp]
        mov     eax,edi
        mov     DWORD[40+rsp],edx
        mov     ebx,edi
        xor     ebp,DWORD[52+rsp]
        and     eax,esi
        mov     ecx,r12d
        xor     ebp,DWORD[12+rsp]
        lea     r11d,[((-1894007588))+r11*1+rdx]
        xor     ebx,esi
        rol     ecx,5
        add     r11d,eax
        rol     ebp,1
        and     ebx,r13d
        add     r11d,ecx
        rol     r13d,30
        add     r11d,ebx
        xor     r14d,DWORD[48+rsp]
        mov     eax,esi
        mov     DWORD[44+rsp],ebp
        mov     ebx,esi
        xor     r14d,DWORD[56+rsp]
        and     eax,r13d
        mov     ecx,r11d
        xor     r14d,DWORD[16+rsp]
        lea     edi,[((-1894007588))+rdi*1+rbp]
        xor     ebx,r13d
        rol     ecx,5
        add     edi,eax
        rol     r14d,1
        and     ebx,r12d
        add     edi,ecx
        rol     r12d,30
        add     edi,ebx
        xor     edx,DWORD[52+rsp]
        mov     eax,r13d
        mov     DWORD[48+rsp],r14d
        mov     ebx,r13d
        xor     edx,DWORD[60+rsp]
        and     eax,r12d
        mov     ecx,edi
        xor     edx,DWORD[20+rsp]
        lea     esi,[((-1894007588))+rsi*1+r14]
        xor     ebx,r12d
        rol     ecx,5
        add     esi,eax
        rol     edx,1
        and     ebx,r11d
        add     esi,ecx
        rol     r11d,30
        add     esi,ebx
        xor     ebp,DWORD[56+rsp]
        mov     eax,r12d
        mov     DWORD[52+rsp],edx
        mov     ebx,r12d
        xor     ebp,DWORD[rsp]
        and     eax,r11d
        mov     ecx,esi
        xor     ebp,DWORD[24+rsp]
        lea     r13d,[((-1894007588))+r13*1+rdx]
        xor     ebx,r11d
        rol     ecx,5
        add     r13d,eax
        rol     ebp,1
        and     ebx,edi
        add     r13d,ecx
        rol     edi,30
        add     r13d,ebx
        xor     r14d,DWORD[60+rsp]
        mov     eax,r11d
        mov     DWORD[56+rsp],ebp
        mov     ebx,r11d
        xor     r14d,DWORD[4+rsp]
        and     eax,edi
        mov     ecx,r13d
        xor     r14d,DWORD[28+rsp]
        lea     r12d,[((-1894007588))+r12*1+rbp]
        xor     ebx,edi
        rol     ecx,5
        add     r12d,eax
        rol     r14d,1
        and     ebx,esi
        add     r12d,ecx
        rol     esi,30
        add     r12d,ebx
        xor     edx,DWORD[rsp]
        mov     eax,edi
        mov     DWORD[60+rsp],r14d
        mov     ebx,edi
        xor     edx,DWORD[8+rsp]
        and     eax,esi
        mov     ecx,r12d
        xor     edx,DWORD[32+rsp]
        lea     r11d,[((-1894007588))+r11*1+r14]
        xor     ebx,esi
        rol     ecx,5
        add     r11d,eax
        rol     edx,1
        and     ebx,r13d
        add     r11d,ecx
        rol     r13d,30
        add     r11d,ebx
        xor     ebp,DWORD[4+rsp]
        mov     eax,esi
        mov     DWORD[rsp],edx
        mov     ebx,esi
        xor     ebp,DWORD[12+rsp]
        and     eax,r13d
        mov     ecx,r11d
        xor     ebp,DWORD[36+rsp]
        lea     edi,[((-1894007588))+rdi*1+rdx]
        xor     ebx,r13d
        rol     ecx,5
        add     edi,eax
        rol     ebp,1
        and     ebx,r12d
        add     edi,ecx
        rol     r12d,30
        add     edi,ebx
        xor     r14d,DWORD[8+rsp]
        mov     eax,r13d
        mov     DWORD[4+rsp],ebp
        mov     ebx,r13d
        xor     r14d,DWORD[16+rsp]
        and     eax,r12d
        mov     ecx,edi
        xor     r14d,DWORD[40+rsp]
        lea     esi,[((-1894007588))+rsi*1+rbp]
        xor     ebx,r12d
        rol     ecx,5
        add     esi,eax
        rol     r14d,1
        and     ebx,r11d
        add     esi,ecx
        rol     r11d,30
        add     esi,ebx
        xor     edx,DWORD[12+rsp]
        mov     eax,r12d
        mov     DWORD[8+rsp],r14d
        mov     ebx,r12d
        xor     edx,DWORD[20+rsp]
        and     eax,r11d
        mov     ecx,esi
        xor     edx,DWORD[44+rsp]
        lea     r13d,[((-1894007588))+r13*1+r14]
        xor     ebx,r11d
        rol     ecx,5
        add     r13d,eax
        rol     edx,1
        and     ebx,edi
        add     r13d,ecx
        rol     edi,30
        add     r13d,ebx
        xor     ebp,DWORD[16+rsp]
        mov     eax,r11d
        mov     DWORD[12+rsp],edx
        mov     ebx,r11d
        xor     ebp,DWORD[24+rsp]
        and     eax,edi
        mov     ecx,r13d
        xor     ebp,DWORD[48+rsp]
        lea     r12d,[((-1894007588))+r12*1+rdx]
        xor     ebx,edi
        rol     ecx,5
        add     r12d,eax
        rol     ebp,1
        and     ebx,esi
        add     r12d,ecx
        rol     esi,30
        add     r12d,ebx
        xor     r14d,DWORD[20+rsp]
        mov     eax,edi
        mov     DWORD[16+rsp],ebp
        mov     ebx,edi
        xor     r14d,DWORD[28+rsp]
        and     eax,esi
        mov     ecx,r12d
        xor     r14d,DWORD[52+rsp]
        lea     r11d,[((-1894007588))+r11*1+rbp]
        xor     ebx,esi
        rol     ecx,5
        add     r11d,eax
        rol     r14d,1
        and     ebx,r13d
        add     r11d,ecx
        rol     r13d,30
        add     r11d,ebx
        xor     edx,DWORD[24+rsp]
        mov     eax,esi
        mov     DWORD[20+rsp],r14d
        mov     ebx,esi
        xor     edx,DWORD[32+rsp]
        and     eax,r13d
        mov     ecx,r11d
        xor     edx,DWORD[56+rsp]
        lea     edi,[((-1894007588))+rdi*1+r14]
        xor     ebx,r13d
        rol     ecx,5
        add     edi,eax
        rol     edx,1
        and     ebx,r12d
        add     edi,ecx
        rol     r12d,30
        add     edi,ebx
        xor     ebp,DWORD[28+rsp]
        mov     eax,r13d
        mov     DWORD[24+rsp],edx
        mov     ebx,r13d
        xor     ebp,DWORD[36+rsp]
        and     eax,r12d
        mov     ecx,edi
        xor     ebp,DWORD[60+rsp]
        lea     esi,[((-1894007588))+rsi*1+rdx]
        xor     ebx,r12d
        rol     ecx,5
        add     esi,eax
        rol     ebp,1
        and     ebx,r11d
        add     esi,ecx
        rol     r11d,30
        add     esi,ebx
        xor     r14d,DWORD[32+rsp]
        mov     eax,r12d
        mov     DWORD[28+rsp],ebp
        mov     ebx,r12d
        xor     r14d,DWORD[40+rsp]
        and     eax,r11d
        mov     ecx,esi
        xor     r14d,DWORD[rsp]
        lea     r13d,[((-1894007588))+r13*1+rbp]
        xor     ebx,r11d
        rol     ecx,5
        add     r13d,eax
        rol     r14d,1
        and     ebx,edi
        add     r13d,ecx
        rol     edi,30
        add     r13d,ebx
        xor     edx,DWORD[36+rsp]
        mov     eax,r11d
        mov     DWORD[32+rsp],r14d
        mov     ebx,r11d
        xor     edx,DWORD[44+rsp]
        and     eax,edi
        mov     ecx,r13d
        xor     edx,DWORD[4+rsp]
        lea     r12d,[((-1894007588))+r12*1+r14]
        xor     ebx,edi
        rol     ecx,5
        add     r12d,eax
        rol     edx,1
        and     ebx,esi
        add     r12d,ecx
        rol     esi,30
        add     r12d,ebx
        xor     ebp,DWORD[40+rsp]
        mov     eax,edi
        mov     DWORD[36+rsp],edx
        mov     ebx,edi
        xor     ebp,DWORD[48+rsp]
        and     eax,esi
        mov     ecx,r12d
        xor     ebp,DWORD[8+rsp]
        lea     r11d,[((-1894007588))+r11*1+rdx]
        xor     ebx,esi
        rol     ecx,5
        add     r11d,eax
        rol     ebp,1
        and     ebx,r13d
        add     r11d,ecx
        rol     r13d,30
        add     r11d,ebx
        xor     r14d,DWORD[44+rsp]
        mov     eax,esi
        mov     DWORD[40+rsp],ebp
        mov     ebx,esi
        xor     r14d,DWORD[52+rsp]
        and     eax,r13d
        mov     ecx,r11d
        xor     r14d,DWORD[12+rsp]
        lea     edi,[((-1894007588))+rdi*1+rbp]
        xor     ebx,r13d
        rol     ecx,5
        add     edi,eax
        rol     r14d,1
        and     ebx,r12d
        add     edi,ecx
        rol     r12d,30
        add     edi,ebx
        xor     edx,DWORD[48+rsp]
        mov     eax,r13d
        mov     DWORD[44+rsp],r14d
        mov     ebx,r13d
        xor     edx,DWORD[56+rsp]
        and     eax,r12d
        mov     ecx,edi
        xor     edx,DWORD[16+rsp]
        lea     esi,[((-1894007588))+rsi*1+r14]
        xor     ebx,r12d
        rol     ecx,5
        add     esi,eax
        rol     edx,1
        and     ebx,r11d
        add     esi,ecx
        rol     r11d,30
        add     esi,ebx
        xor     ebp,DWORD[52+rsp]
        mov     eax,edi
        mov     DWORD[48+rsp],edx
        mov     ecx,esi
        xor     ebp,DWORD[60+rsp]
        xor     eax,r12d
        rol     ecx,5
        xor     ebp,DWORD[20+rsp]
        lea     r13d,[((-899497514))+r13*1+rdx]
        xor     eax,r11d
        add     r13d,ecx
        rol     edi,30
        add     r13d,eax
        rol     ebp,1
        xor     r14d,DWORD[56+rsp]
        mov     eax,esi
        mov     DWORD[52+rsp],ebp
        mov     ecx,r13d
        xor     r14d,DWORD[rsp]
        xor     eax,r11d
        rol     ecx,5
        xor     r14d,DWORD[24+rsp]
        lea     r12d,[((-899497514))+r12*1+rbp]
        xor     eax,edi
        add     r12d,ecx
        rol     esi,30
        add     r12d,eax
        rol     r14d,1
        xor     edx,DWORD[60+rsp]
        mov     eax,r13d
        mov     DWORD[56+rsp],r14d
        mov     ecx,r12d
        xor     edx,DWORD[4+rsp]
        xor     eax,edi
        rol     ecx,5
        xor     edx,DWORD[28+rsp]
        lea     r11d,[((-899497514))+r11*1+r14]
        xor     eax,esi
        add     r11d,ecx
        rol     r13d,30
        add     r11d,eax
        rol     edx,1
        xor     ebp,DWORD[rsp]
        mov     eax,r12d
        mov     DWORD[60+rsp],edx
        mov     ecx,r11d
        xor     ebp,DWORD[8+rsp]
        xor     eax,esi
        rol     ecx,5
        xor     ebp,DWORD[32+rsp]
        lea     edi,[((-899497514))+rdi*1+rdx]
        xor     eax,r13d
        add     edi,ecx
        rol     r12d,30
        add     edi,eax
        rol     ebp,1
        xor     r14d,DWORD[4+rsp]
        mov     eax,r11d
        mov     DWORD[rsp],ebp
        mov     ecx,edi
        xor     r14d,DWORD[12+rsp]
        xor     eax,r13d
        rol     ecx,5
        xor     r14d,DWORD[36+rsp]
        lea     esi,[((-899497514))+rsi*1+rbp]
        xor     eax,r12d
        add     esi,ecx
        rol     r11d,30
        add     esi,eax
        rol     r14d,1
        xor     edx,DWORD[8+rsp]
        mov     eax,edi
        mov     DWORD[4+rsp],r14d
        mov     ecx,esi
        xor     edx,DWORD[16+rsp]
        xor     eax,r12d
        rol     ecx,5
        xor     edx,DWORD[40+rsp]
        lea     r13d,[((-899497514))+r13*1+r14]
        xor     eax,r11d
        add     r13d,ecx
        rol     edi,30
        add     r13d,eax
        rol     edx,1
        xor     ebp,DWORD[12+rsp]
        mov     eax,esi
        mov     DWORD[8+rsp],edx
        mov     ecx,r13d
        xor     ebp,DWORD[20+rsp]
        xor     eax,r11d
        rol     ecx,5
        xor     ebp,DWORD[44+rsp]
        lea     r12d,[((-899497514))+r12*1+rdx]
        xor     eax,edi
        add     r12d,ecx
        rol     esi,30
        add     r12d,eax
        rol     ebp,1
        xor     r14d,DWORD[16+rsp]
        mov     eax,r13d
        mov     DWORD[12+rsp],ebp
        mov     ecx,r12d
        xor     r14d,DWORD[24+rsp]
        xor     eax,edi
        rol     ecx,5
        xor     r14d,DWORD[48+rsp]
        lea     r11d,[((-899497514))+r11*1+rbp]
        xor     eax,esi
        add     r11d,ecx
        rol     r13d,30
        add     r11d,eax
        rol     r14d,1
        xor     edx,DWORD[20+rsp]
        mov     eax,r12d
        mov     DWORD[16+rsp],r14d
        mov     ecx,r11d
        xor     edx,DWORD[28+rsp]
        xor     eax,esi
        rol     ecx,5
        xor     edx,DWORD[52+rsp]
        lea     edi,[((-899497514))+rdi*1+r14]
        xor     eax,r13d
        add     edi,ecx
        rol     r12d,30
        add     edi,eax
        rol     edx,1
        xor     ebp,DWORD[24+rsp]
        mov     eax,r11d
        mov     DWORD[20+rsp],edx
        mov     ecx,edi
        xor     ebp,DWORD[32+rsp]
        xor     eax,r13d
        rol     ecx,5
        xor     ebp,DWORD[56+rsp]
        lea     esi,[((-899497514))+rsi*1+rdx]
        xor     eax,r12d
        add     esi,ecx
        rol     r11d,30
        add     esi,eax
        rol     ebp,1
        xor     r14d,DWORD[28+rsp]
        mov     eax,edi
        mov     DWORD[24+rsp],ebp
        mov     ecx,esi
        xor     r14d,DWORD[36+rsp]
        xor     eax,r12d
        rol     ecx,5
        xor     r14d,DWORD[60+rsp]
        lea     r13d,[((-899497514))+r13*1+rbp]
        xor     eax,r11d
        add     r13d,ecx
        rol     edi,30
        add     r13d,eax
        rol     r14d,1
        xor     edx,DWORD[32+rsp]
        mov     eax,esi
        mov     DWORD[28+rsp],r14d
        mov     ecx,r13d
        xor     edx,DWORD[40+rsp]
        xor     eax,r11d
        rol     ecx,5
        xor     edx,DWORD[rsp]
        lea     r12d,[((-899497514))+r12*1+r14]
        xor     eax,edi
        add     r12d,ecx
        rol     esi,30
        add     r12d,eax
        rol     edx,1
        xor     ebp,DWORD[36+rsp]
        mov     eax,r13d

        mov     ecx,r12d
        xor     ebp,DWORD[44+rsp]
        xor     eax,edi
        rol     ecx,5
        xor     ebp,DWORD[4+rsp]
        lea     r11d,[((-899497514))+r11*1+rdx]
        xor     eax,esi
        add     r11d,ecx
        rol     r13d,30
        add     r11d,eax
        rol     ebp,1
        xor     r14d,DWORD[40+rsp]
        mov     eax,r12d

        mov     ecx,r11d
        xor     r14d,DWORD[48+rsp]
        xor     eax,esi
        rol     ecx,5
        xor     r14d,DWORD[8+rsp]
        lea     edi,[((-899497514))+rdi*1+rbp]
        xor     eax,r13d
        add     edi,ecx
        rol     r12d,30
        add     edi,eax
        rol     r14d,1
        xor     edx,DWORD[44+rsp]
        mov     eax,r11d

        mov     ecx,edi
        xor     edx,DWORD[52+rsp]
        xor     eax,r13d
        rol     ecx,5
        xor     edx,DWORD[12+rsp]
        lea     esi,[((-899497514))+rsi*1+r14]
        xor     eax,r12d
        add     esi,ecx
        rol     r11d,30
        add     esi,eax
        rol     edx,1
        xor     ebp,DWORD[48+rsp]
        mov     eax,edi

        mov     ecx,esi
        xor     ebp,DWORD[56+rsp]
        xor     eax,r12d
        rol     ecx,5
        xor     ebp,DWORD[16+rsp]
        lea     r13d,[((-899497514))+r13*1+rdx]
        xor     eax,r11d
        add     r13d,ecx
        rol     edi,30
        add     r13d,eax
        rol     ebp,1
        xor     r14d,DWORD[52+rsp]
        mov     eax,esi

        mov     ecx,r13d
        xor     r14d,DWORD[60+rsp]
        xor     eax,r11d
        rol     ecx,5
        xor     r14d,DWORD[20+rsp]
        lea     r12d,[((-899497514))+r12*1+rbp]
        xor     eax,edi
        add     r12d,ecx
        rol     esi,30
        add     r12d,eax
        rol     r14d,1
        xor     edx,DWORD[56+rsp]
        mov     eax,r13d

        mov     ecx,r12d
        xor     edx,DWORD[rsp]
        xor     eax,edi
        rol     ecx,5
        xor     edx,DWORD[24+rsp]
        lea     r11d,[((-899497514))+r11*1+r14]
        xor     eax,esi
        add     r11d,ecx
        rol     r13d,30
        add     r11d,eax
        rol     edx,1
        xor     ebp,DWORD[60+rsp]
        mov     eax,r12d

        mov     ecx,r11d
        xor     ebp,DWORD[4+rsp]
        xor     eax,esi
        rol     ecx,5
        xor     ebp,DWORD[28+rsp]
        lea     edi,[((-899497514))+rdi*1+rdx]
        xor     eax,r13d
        add     edi,ecx
        rol     r12d,30
        add     edi,eax
        rol     ebp,1
        mov     eax,r11d
        mov     ecx,edi
        xor     eax,r13d
        lea     esi,[((-899497514))+rsi*1+rbp]
        rol     ecx,5
        xor     eax,r12d
        add     esi,ecx
        rol     r11d,30
        add     esi,eax
        add     esi,DWORD[r8]
        add     edi,DWORD[4+r8]
        add     r11d,DWORD[8+r8]
        add     r12d,DWORD[12+r8]
        add     r13d,DWORD[16+r8]
        mov     DWORD[r8],esi
        mov     DWORD[4+r8],edi
        mov     DWORD[8+r8],r11d
        mov     DWORD[12+r8],r12d
        mov     DWORD[16+r8],r13d

        sub     r10,1
        lea     r9,[64+r9]
        jnz     NEAR $L$loop

        mov     rsi,QWORD[64+rsp]

        mov     r14,QWORD[((-40))+rsi]

        mov     r13,QWORD[((-32))+rsi]

        mov     r12,QWORD[((-24))+rsi]

        mov     rbp,QWORD[((-16))+rsi]

        mov     rbx,QWORD[((-8))+rsi]

        lea     rsp,[rsi]

$L$epilogue:
        mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
        mov     rsi,QWORD[16+rsp]
        DB      0F3h,0C3h               ;repret

$L$SEH_end_sha1_block_data_order:

ALIGN   32
sha1_block_data_order_shaext:
        mov     QWORD[8+rsp],rdi        ;WIN64 prologue
        mov     QWORD[16+rsp],rsi
        mov     rax,rsp
$L$SEH_begin_sha1_block_data_order_shaext:
        mov     rdi,rcx
        mov     rsi,rdx
        mov     rdx,r8


_shaext_shortcut:

        lea     rsp,[((-72))+rsp]
        movaps  XMMWORD[(-8-64)+rax],xmm6
        movaps  XMMWORD[(-8-48)+rax],xmm7
        movaps  XMMWORD[(-8-32)+rax],xmm8
        movaps  XMMWORD[(-8-16)+rax],xmm9
$L$prologue_shaext:
        movdqu  xmm0,XMMWORD[rdi]
        movd    xmm1,DWORD[16+rdi]
        movdqa  xmm3,XMMWORD[((K_XX_XX+160))]

        movdqu  xmm4,XMMWORD[rsi]
        pshufd  xmm0,xmm0,27
        movdqu  xmm5,XMMWORD[16+rsi]
        pshufd  xmm1,xmm1,27
        movdqu  xmm6,XMMWORD[32+rsi]
DB      102,15,56,0,227
        movdqu  xmm7,XMMWORD[48+rsi]
DB      102,15,56,0,235
DB      102,15,56,0,243
        movdqa  xmm9,xmm1
DB      102,15,56,0,251
        jmp     NEAR $L$oop_shaext

ALIGN   16
$L$oop_shaext:
        dec     rdx
        lea     r8,[64+rsi]
        paddd   xmm1,xmm4
        cmovne  rsi,r8
        movdqa  xmm8,xmm0
DB      15,56,201,229
        movdqa  xmm2,xmm0
DB      15,58,204,193,0
DB      15,56,200,213
        pxor    xmm4,xmm6
DB      15,56,201,238
DB      15,56,202,231

        movdqa  xmm1,xmm0
DB      15,58,204,194,0
DB      15,56,200,206
        pxor    xmm5,xmm7
DB      15,56,202,236
DB      15,56,201,247
        movdqa  xmm2,xmm0
DB      15,58,204,193,0
DB      15,56,200,215
        pxor    xmm6,xmm4
DB      15,56,201,252
DB      15,56,202,245

        movdqa  xmm1,xmm0
DB      15,58,204,194,0
DB      15,56,200,204
        pxor    xmm7,xmm5
DB      15,56,202,254
DB      15,56,201,229
        movdqa  xmm2,xmm0
DB      15,58,204,193,0
DB      15,56,200,213
        pxor    xmm4,xmm6
DB      15,56,201,238
DB      15,56,202,231

        movdqa  xmm1,xmm0
DB      15,58,204,194,1
DB      15,56,200,206
        pxor    xmm5,xmm7
DB      15,56,202,236
DB      15,56,201,247
        movdqa  xmm2,xmm0
DB      15,58,204,193,1
DB      15,56,200,215
        pxor    xmm6,xmm4
DB      15,56,201,252
DB      15,56,202,245

        movdqa  xmm1,xmm0
DB      15,58,204,194,1
DB      15,56,200,204
        pxor    xmm7,xmm5
DB      15,56,202,254
DB      15,56,201,229
        movdqa  xmm2,xmm0
DB      15,58,204,193,1
DB      15,56,200,213
        pxor    xmm4,xmm6
DB      15,56,201,238
DB      15,56,202,231

        movdqa  xmm1,xmm0
DB      15,58,204,194,1
DB      15,56,200,206
        pxor    xmm5,xmm7
DB      15,56,202,236
DB      15,56,201,247
        movdqa  xmm2,xmm0
DB      15,58,204,193,2
DB      15,56,200,215
        pxor    xmm6,xmm4
DB      15,56,201,252
DB      15,56,202,245

        movdqa  xmm1,xmm0
DB      15,58,204,194,2
DB      15,56,200,204
        pxor    xmm7,xmm5
DB      15,56,202,254
DB      15,56,201,229
        movdqa  xmm2,xmm0
DB      15,58,204,193,2
DB      15,56,200,213
        pxor    xmm4,xmm6
DB      15,56,201,238
DB      15,56,202,231

        movdqa  xmm1,xmm0
DB      15,58,204,194,2
DB      15,56,200,206
        pxor    xmm5,xmm7
DB      15,56,202,236
DB      15,56,201,247
        movdqa  xmm2,xmm0
DB      15,58,204,193,2
DB      15,56,200,215
        pxor    xmm6,xmm4
DB      15,56,201,252
DB      15,56,202,245

        movdqa  xmm1,xmm0
DB      15,58,204,194,3
DB      15,56,200,204
        pxor    xmm7,xmm5
DB      15,56,202,254
        movdqu  xmm4,XMMWORD[rsi]
        movdqa  xmm2,xmm0
DB      15,58,204,193,3
DB      15,56,200,213
        movdqu  xmm5,XMMWORD[16+rsi]
DB      102,15,56,0,227

        movdqa  xmm1,xmm0
DB      15,58,204,194,3
DB      15,56,200,206
        movdqu  xmm6,XMMWORD[32+rsi]
DB      102,15,56,0,235

        movdqa  xmm2,xmm0
DB      15,58,204,193,3
DB      15,56,200,215
        movdqu  xmm7,XMMWORD[48+rsi]
DB      102,15,56,0,243

        movdqa  xmm1,xmm0
DB      15,58,204,194,3
DB      65,15,56,200,201
DB      102,15,56,0,251

        paddd   xmm0,xmm8
        movdqa  xmm9,xmm1

        jnz     NEAR $L$oop_shaext

        pshufd  xmm0,xmm0,27
        pshufd  xmm1,xmm1,27
        movdqu  XMMWORD[rdi],xmm0
        movd    DWORD[16+rdi],xmm1
        movaps  xmm6,XMMWORD[((-8-64))+rax]
        movaps  xmm7,XMMWORD[((-8-48))+rax]
        movaps  xmm8,XMMWORD[((-8-32))+rax]
        movaps  xmm9,XMMWORD[((-8-16))+rax]
        mov     rsp,rax
$L$epilogue_shaext:
        mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
        mov     rsi,QWORD[16+rsp]
        DB      0F3h,0C3h               ;repret

$L$SEH_end_sha1_block_data_order_shaext:

ALIGN   16
sha1_block_data_order_ssse3:
        mov     QWORD[8+rsp],rdi        ;WIN64 prologue
        mov     QWORD[16+rsp],rsi
        mov     rax,rsp
$L$SEH_begin_sha1_block_data_order_ssse3:
        mov     rdi,rcx
        mov     rsi,rdx
        mov     rdx,r8


_ssse3_shortcut:

        mov     r11,rsp

        push    rbx

        push    rbp

        push    r12

        push    r13

        push    r14

        lea     rsp,[((-160))+rsp]
        movaps  XMMWORD[(-40-96)+r11],xmm6
        movaps  XMMWORD[(-40-80)+r11],xmm7
        movaps  XMMWORD[(-40-64)+r11],xmm8
        movaps  XMMWORD[(-40-48)+r11],xmm9
        movaps  XMMWORD[(-40-32)+r11],xmm10
        movaps  XMMWORD[(-40-16)+r11],xmm11
$L$prologue_ssse3:
        and     rsp,-64
        mov     r8,rdi
        mov     r9,rsi
        mov     r10,rdx

        shl     r10,6
        add     r10,r9
        lea     r14,[((K_XX_XX+64))]

        mov     eax,DWORD[r8]
        mov     ebx,DWORD[4+r8]
        mov     ecx,DWORD[8+r8]
        mov     edx,DWORD[12+r8]
        mov     esi,ebx
        mov     ebp,DWORD[16+r8]
        mov     edi,ecx
        xor     edi,edx
        and     esi,edi

        movdqa  xmm6,XMMWORD[64+r14]
        movdqa  xmm9,XMMWORD[((-64))+r14]
        movdqu  xmm0,XMMWORD[r9]
        movdqu  xmm1,XMMWORD[16+r9]
        movdqu  xmm2,XMMWORD[32+r9]
        movdqu  xmm3,XMMWORD[48+r9]
DB      102,15,56,0,198
DB      102,15,56,0,206
DB      102,15,56,0,214
        add     r9,64
        paddd   xmm0,xmm9
DB      102,15,56,0,222
        paddd   xmm1,xmm9
        paddd   xmm2,xmm9
        movdqa  XMMWORD[rsp],xmm0
        psubd   xmm0,xmm9
        movdqa  XMMWORD[16+rsp],xmm1
        psubd   xmm1,xmm9
        movdqa  XMMWORD[32+rsp],xmm2
        psubd   xmm2,xmm9
        jmp     NEAR $L$oop_ssse3
ALIGN   16
$L$oop_ssse3:
        ror     ebx,2
        pshufd  xmm4,xmm0,238
        xor     esi,edx
        movdqa  xmm8,xmm3
        paddd   xmm9,xmm3
        mov     edi,eax
        add     ebp,DWORD[rsp]
        punpcklqdq      xmm4,xmm1
        xor     ebx,ecx
        rol     eax,5
        add     ebp,esi
        psrldq  xmm8,4
        and     edi,ebx
        xor     ebx,ecx
        pxor    xmm4,xmm0
        add     ebp,eax
        ror     eax,7
        pxor    xmm8,xmm2
        xor     edi,ecx
        mov     esi,ebp
        add     edx,DWORD[4+rsp]
        pxor    xmm4,xmm8
        xor     eax,ebx
        rol     ebp,5
        movdqa  XMMWORD[48+rsp],xmm9
        add     edx,edi
        and     esi,eax
        movdqa  xmm10,xmm4
        xor     eax,ebx
        add     edx,ebp
        ror     ebp,7
        movdqa  xmm8,xmm4
        xor     esi,ebx
        pslldq  xmm10,12
        paddd   xmm4,xmm4
        mov     edi,edx
        add     ecx,DWORD[8+rsp]
        psrld   xmm8,31
        xor     ebp,eax
        rol     edx,5
        add     ecx,esi
        movdqa  xmm9,xmm10
        and     edi,ebp
        xor     ebp,eax
        psrld   xmm10,30
        add     ecx,edx
        ror     edx,7
        por     xmm4,xmm8
        xor     edi,eax
        mov     esi,ecx
        add     ebx,DWORD[12+rsp]
        pslld   xmm9,2
        pxor    xmm4,xmm10
        xor     edx,ebp
        movdqa  xmm10,XMMWORD[((-64))+r14]
        rol     ecx,5
        add     ebx,edi
        and     esi,edx
        pxor    xmm4,xmm9
        xor     edx,ebp
        add     ebx,ecx
        ror     ecx,7
        pshufd  xmm5,xmm1,238
        xor     esi,ebp
        movdqa  xmm9,xmm4
        paddd   xmm10,xmm4
        mov     edi,ebx
        add     eax,DWORD[16+rsp]
        punpcklqdq      xmm5,xmm2
        xor     ecx,edx
        rol     ebx,5
        add     eax,esi
        psrldq  xmm9,4
        and     edi,ecx
        xor     ecx,edx
        pxor    xmm5,xmm1
        add     eax,ebx
        ror     ebx,7
        pxor    xmm9,xmm3
        xor     edi,edx
        mov     esi,eax
        add     ebp,DWORD[20+rsp]
        pxor    xmm5,xmm9
        xor     ebx,ecx
        rol     eax,5
        movdqa  XMMWORD[rsp],xmm10
        add     ebp,edi
        and     esi,ebx
        movdqa  xmm8,xmm5
        xor     ebx,ecx
        add     ebp,eax
        ror     eax,7
        movdqa  xmm9,xmm5
        xor     esi,ecx
        pslldq  xmm8,12
        paddd   xmm5,xmm5
        mov     edi,ebp
        add     edx,DWORD[24+rsp]
        psrld   xmm9,31
        xor     eax,ebx
        rol     ebp,5
        add     edx,esi
        movdqa  xmm10,xmm8
        and     edi,eax
        xor     eax,ebx
        psrld   xmm8,30
        add     edx,ebp
        ror     ebp,7
        por     xmm5,xmm9
        xor     edi,ebx
        mov     esi,edx
        add     ecx,DWORD[28+rsp]
        pslld   xmm10,2
        pxor    xmm5,xmm8
        xor     ebp,eax
        movdqa  xmm8,XMMWORD[((-32))+r14]
        rol     edx,5
        add     ecx,edi
        and     esi,ebp
        pxor    xmm5,xmm10
        xor     ebp,eax
        add     ecx,edx
        ror     edx,7
        pshufd  xmm6,xmm2,238
        xor     esi,eax
        movdqa  xmm10,xmm5
        paddd   xmm8,xmm5
        mov     edi,ecx
        add     ebx,DWORD[32+rsp]
        punpcklqdq      xmm6,xmm3
        xor     edx,ebp
        rol     ecx,5
        add     ebx,esi
        psrldq  xmm10,4
        and     edi,edx
        xor     edx,ebp
        pxor    xmm6,xmm2
        add     ebx,ecx
        ror     ecx,7
        pxor    xmm10,xmm4
        xor     edi,ebp
        mov     esi,ebx
        add     eax,DWORD[36+rsp]
        pxor    xmm6,xmm10
        xor     ecx,edx
        rol     ebx,5
        movdqa  XMMWORD[16+rsp],xmm8
        add     eax,edi
        and     esi,ecx
        movdqa  xmm9,xmm6
        xor     ecx,edx
        add     eax,ebx
        ror     ebx,7
        movdqa  xmm10,xmm6
        xor     esi,edx
        pslldq  xmm9,12
        paddd   xmm6,xmm6
        mov     edi,eax
        add     ebp,DWORD[40+rsp]
        psrld   xmm10,31
        xor     ebx,ecx
        rol     eax,5
        add     ebp,esi
        movdqa  xmm8,xmm9
        and     edi,ebx
        xor     ebx,ecx
        psrld   xmm9,30
        add     ebp,eax
        ror     eax,7
        por     xmm6,xmm10
        xor     edi,ecx
        mov     esi,ebp
        add     edx,DWORD[44+rsp]
        pslld   xmm8,2
        pxor    xmm6,xmm9
        xor     eax,ebx
        movdqa  xmm9,XMMWORD[((-32))+r14]
        rol     ebp,5
        add     edx,edi
        and     esi,eax
        pxor    xmm6,xmm8
        xor     eax,ebx
        add     edx,ebp
        ror     ebp,7
        pshufd  xmm7,xmm3,238
        xor     esi,ebx
        movdqa  xmm8,xmm6
        paddd   xmm9,xmm6
        mov     edi,edx
        add     ecx,DWORD[48+rsp]
        punpcklqdq      xmm7,xmm4
        xor     ebp,eax
        rol     edx,5
        add     ecx,esi
        psrldq  xmm8,4
        and     edi,ebp
        xor     ebp,eax
        pxor    xmm7,xmm3
        add     ecx,edx
        ror     edx,7
        pxor    xmm8,xmm5
        xor     edi,eax
        mov     esi,ecx
        add     ebx,DWORD[52+rsp]
        pxor    xmm7,xmm8
        xor     edx,ebp
        rol     ecx,5
        movdqa  XMMWORD[32+rsp],xmm9
        add     ebx,edi
        and     esi,edx
        movdqa  xmm10,xmm7
        xor     edx,ebp
        add     ebx,ecx
        ror     ecx,7
        movdqa  xmm8,xmm7
        xor     esi,ebp
        pslldq  xmm10,12
        paddd   xmm7,xmm7
        mov     edi,ebx
        add     eax,DWORD[56+rsp]
        psrld   xmm8,31
        xor     ecx,edx
        rol     ebx,5
        add     eax,esi
        movdqa  xmm9,xmm10
        and     edi,ecx
        xor     ecx,edx
        psrld   xmm10,30
        add     eax,ebx
        ror     ebx,7
        por     xmm7,xmm8
        xor     edi,edx
        mov     esi,eax
        add     ebp,DWORD[60+rsp]
        pslld   xmm9,2
        pxor    xmm7,xmm10
        xor     ebx,ecx
        movdqa  xmm10,XMMWORD[((-32))+r14]
        rol     eax,5
        add     ebp,edi
        and     esi,ebx
        pxor    xmm7,xmm9
        pshufd  xmm9,xmm6,238
        xor     ebx,ecx
        add     ebp,eax
        ror     eax,7
        pxor    xmm0,xmm4
        xor     esi,ecx
        mov     edi,ebp
        add     edx,DWORD[rsp]
        punpcklqdq      xmm9,xmm7
        xor     eax,ebx
        rol     ebp,5
        pxor    xmm0,xmm1
        add     edx,esi
        and     edi,eax
        movdqa  xmm8,xmm10
        xor     eax,ebx
        paddd   xmm10,xmm7
        add     edx,ebp
        pxor    xmm0,xmm9
        ror     ebp,7
        xor     edi,ebx
        mov     esi,edx
        add     ecx,DWORD[4+rsp]
        movdqa  xmm9,xmm0
        xor     ebp,eax
        rol     edx,5
        movdqa  XMMWORD[48+rsp],xmm10
        add     ecx,edi
        and     esi,ebp
        xor     ebp,eax
        pslld   xmm0,2
        add     ecx,edx
        ror     edx,7
        psrld   xmm9,30
        xor     esi,eax
        mov     edi,ecx
        add     ebx,DWORD[8+rsp]
        por     xmm0,xmm9
        xor     edx,ebp
        rol     ecx,5
        pshufd  xmm10,xmm7,238
        add     ebx,esi
        and     edi,edx
        xor     edx,ebp
        add     ebx,ecx
        add     eax,DWORD[12+rsp]
        xor     edi,ebp
        mov     esi,ebx
        rol     ebx,5
        add     eax,edi
        xor     esi,edx
        ror     ecx,7
        add     eax,ebx
        pxor    xmm1,xmm5
        add     ebp,DWORD[16+rsp]
        xor     esi,ecx
        punpcklqdq      xmm10,xmm0
        mov     edi,eax
        rol     eax,5
        pxor    xmm1,xmm2
        add     ebp,esi
        xor     edi,ecx
        movdqa  xmm9,xmm8
        ror     ebx,7
        paddd   xmm8,xmm0
        add     ebp,eax
        pxor    xmm1,xmm10
        add     edx,DWORD[20+rsp]
        xor     edi,ebx
        mov     esi,ebp
        rol     ebp,5
        movdqa  xmm10,xmm1
        add     edx,edi
        xor     esi,ebx
        movdqa  XMMWORD[rsp],xmm8
        ror     eax,7
        add     edx,ebp
        add     ecx,DWORD[24+rsp]
        pslld   xmm1,2
        xor     esi,eax
        mov     edi,edx
        psrld   xmm10,30
        rol     edx,5
        add     ecx,esi
        xor     edi,eax
        ror     ebp,7
        por     xmm1,xmm10
        add     ecx,edx
        add     ebx,DWORD[28+rsp]
        pshufd  xmm8,xmm0,238
        xor     edi,ebp
        mov     esi,ecx
        rol     ecx,5
        add     ebx,edi
        xor     esi,ebp
        ror     edx,7
        add     ebx,ecx
        pxor    xmm2,xmm6
        add     eax,DWORD[32+rsp]
        xor     esi,edx
        punpcklqdq      xmm8,xmm1
        mov     edi,ebx
        rol     ebx,5
        pxor    xmm2,xmm3
        add     eax,esi
        xor     edi,edx
        movdqa  xmm10,XMMWORD[r14]
        ror     ecx,7
        paddd   xmm9,xmm1
        add     eax,ebx
        pxor    xmm2,xmm8
        add     ebp,DWORD[36+rsp]
        xor     edi,ecx
        mov     esi,eax
        rol     eax,5
        movdqa  xmm8,xmm2
        add     ebp,edi
        xor     esi,ecx
        movdqa  XMMWORD[16+rsp],xmm9
        ror     ebx,7
        add     ebp,eax
        add     edx,DWORD[40+rsp]
        pslld   xmm2,2
        xor     esi,ebx
        mov     edi,ebp
        psrld   xmm8,30
        rol     ebp,5
        add     edx,esi
        xor     edi,ebx
        ror     eax,7
        por     xmm2,xmm8
        add     edx,ebp
        add     ecx,DWORD[44+rsp]
        pshufd  xmm9,xmm1,238
        xor     edi,eax
        mov     esi,edx
        rol     edx,5
        add     ecx,edi
        xor     esi,eax
        ror     ebp,7
        add     ecx,edx
        pxor    xmm3,xmm7
        add     ebx,DWORD[48+rsp]
        xor     esi,ebp
        punpcklqdq      xmm9,xmm2
        mov     edi,ecx
        rol     ecx,5
        pxor    xmm3,xmm4
        add     ebx,esi
        xor     edi,ebp
        movdqa  xmm8,xmm10
        ror     edx,7
        paddd   xmm10,xmm2
        add     ebx,ecx
        pxor    xmm3,xmm9
        add     eax,DWORD[52+rsp]
        xor     edi,edx
        mov     esi,ebx
        rol     ebx,5
        movdqa  xmm9,xmm3
        add     eax,edi
        xor     esi,edx
        movdqa  XMMWORD[32+rsp],xmm10
        ror     ecx,7
        add     eax,ebx
        add     ebp,DWORD[56+rsp]
        pslld   xmm3,2
        xor     esi,ecx
        mov     edi,eax
        psrld   xmm9,30
        rol     eax,5
        add     ebp,esi
        xor     edi,ecx
        ror     ebx,7
        por     xmm3,xmm9
        add     ebp,eax
        add     edx,DWORD[60+rsp]
        pshufd  xmm10,xmm2,238
        xor     edi,ebx
        mov     esi,ebp
        rol     ebp,5
        add     edx,edi
        xor     esi,ebx
        ror     eax,7
        add     edx,ebp
        pxor    xmm4,xmm0
        add     ecx,DWORD[rsp]
        xor     esi,eax
        punpcklqdq      xmm10,xmm3
        mov     edi,edx
        rol     edx,5
        pxor    xmm4,xmm5
        add     ecx,esi
        xor     edi,eax
        movdqa  xmm9,xmm8
        ror     ebp,7
        paddd   xmm8,xmm3
        add     ecx,edx
        pxor    xmm4,xmm10
        add     ebx,DWORD[4+rsp]
        xor     edi,ebp
        mov     esi,ecx
        rol     ecx,5
        movdqa  xmm10,xmm4
        add     ebx,edi
        xor     esi,ebp
        movdqa  XMMWORD[48+rsp],xmm8
        ror     edx,7
        add     ebx,ecx
        add     eax,DWORD[8+rsp]
        pslld   xmm4,2
        xor     esi,edx
        mov     edi,ebx
        psrld   xmm10,30
        rol     ebx,5
        add     eax,esi
        xor     edi,edx
        ror     ecx,7
        por     xmm4,xmm10
        add     eax,ebx
        add     ebp,DWORD[12+rsp]
        pshufd  xmm8,xmm3,238
        xor     edi,ecx
        mov     esi,eax
        rol     eax,5
        add     ebp,edi
        xor     esi,ecx
        ror     ebx,7
        add     ebp,eax
        pxor    xmm5,xmm1
        add     edx,DWORD[16+rsp]
        xor     esi,ebx
        punpcklqdq      xmm8,xmm4
        mov     edi,ebp
        rol     ebp,5
        pxor    xmm5,xmm6
        add     edx,esi
        xor     edi,ebx
        movdqa  xmm10,xmm9
        ror     eax,7
        paddd   xmm9,xmm4
        add     edx,ebp
        pxor    xmm5,xmm8
        add     ecx,DWORD[20+rsp]
        xor     edi,eax
        mov     esi,edx
        rol     edx,5
        movdqa  xmm8,xmm5
        add     ecx,edi
        xor     esi,eax
        movdqa  XMMWORD[rsp],xmm9
        ror     ebp,7
        add     ecx,edx
        add     ebx,DWORD[24+rsp]
        pslld   xmm5,2
        xor     esi,ebp
        mov     edi,ecx
        psrld   xmm8,30
        rol     ecx,5
        add     ebx,esi
        xor     edi,ebp
        ror     edx,7
        por     xmm5,xmm8
        add     ebx,ecx
        add     eax,DWORD[28+rsp]
        pshufd  xmm9,xmm4,238
        ror     ecx,7
        mov     esi,ebx
        xor     edi,edx
        rol     ebx,5
        add     eax,edi
        xor     esi,ecx
        xor     ecx,edx
        add     eax,ebx
        pxor    xmm6,xmm2
        add     ebp,DWORD[32+rsp]
        and     esi,ecx
        xor     ecx,edx
        ror     ebx,7
        punpcklqdq      xmm9,xmm5
        mov     edi,eax
        xor     esi,ecx
        pxor    xmm6,xmm7
        rol     eax,5
        add     ebp,esi
        movdqa  xmm8,xmm10
        xor     edi,ebx
        paddd   xmm10,xmm5
        xor     ebx,ecx
        pxor    xmm6,xmm9
        add     ebp,eax
        add     edx,DWORD[36+rsp]
        and     edi,ebx
        xor     ebx,ecx
        ror     eax,7
        movdqa  xmm9,xmm6
        mov     esi,ebp
        xor     edi,ebx
        movdqa  XMMWORD[16+rsp],xmm10
        rol     ebp,5
        add     edx,edi
        xor     esi,eax
        pslld   xmm6,2
        xor     eax,ebx
        add     edx,ebp
        psrld   xmm9,30
        add     ecx,DWORD[40+rsp]
        and     esi,eax
        xor     eax,ebx
        por     xmm6,xmm9
        ror     ebp,7
        mov     edi,edx
        xor     esi,eax
        rol     edx,5
        pshufd  xmm10,xmm5,238
        add     ecx,esi
        xor     edi,ebp
        xor     ebp,eax
        add     ecx,edx
        add     ebx,DWORD[44+rsp]
        and     edi,ebp
        xor     ebp,eax
        ror     edx,7
        mov     esi,ecx
        xor     edi,ebp
        rol     ecx,5
        add     ebx,edi
        xor     esi,edx
        xor     edx,ebp
        add     ebx,ecx
        pxor    xmm7,xmm3
        add     eax,DWORD[48+rsp]
        and     esi,edx
        xor     edx,ebp
        ror     ecx,7
        punpcklqdq      xmm10,xmm6
        mov     edi,ebx
        xor     esi,edx
        pxor    xmm7,xmm0
        rol     ebx,5
        add     eax,esi
        movdqa  xmm9,XMMWORD[32+r14]
        xor     edi,ecx
        paddd   xmm8,xmm6
        xor     ecx,edx
        pxor    xmm7,xmm10
        add     eax,ebx
        add     ebp,DWORD[52+rsp]
        and     edi,ecx
        xor     ecx,edx
        ror     ebx,7
        movdqa  xmm10,xmm7
        mov     esi,eax
        xor     edi,ecx
        movdqa  XMMWORD[32+rsp],xmm8
        rol     eax,5
        add     ebp,edi
        xor     esi,ebx
        pslld   xmm7,2
        xor     ebx,ecx
        add     ebp,eax
        psrld   xmm10,30
        add     edx,DWORD[56+rsp]
        and     esi,ebx
        xor     ebx,ecx
        por     xmm7,xmm10
        ror     eax,7
        mov     edi,ebp
        xor     esi,ebx
        rol     ebp,5
        pshufd  xmm8,xmm6,238
        add     edx,esi
        xor     edi,eax
        xor     eax,ebx
        add     edx,ebp
        add     ecx,DWORD[60+rsp]
        and     edi,eax
        xor     eax,ebx
        ror     ebp,7
        mov     esi,edx
        xor     edi,eax
        rol     edx,5
        add     ecx,edi
        xor     esi,ebp
        xor     ebp,eax
        add     ecx,edx
        pxor    xmm0,xmm4
        add     ebx,DWORD[rsp]
        and     esi,ebp
        xor     ebp,eax
        ror     edx,7
        punpcklqdq      xmm8,xmm7
        mov     edi,ecx
        xor     esi,ebp
        pxor    xmm0,xmm1
        rol     ecx,5
        add     ebx,esi
        movdqa  xmm10,xmm9
        xor     edi,edx
        paddd   xmm9,xmm7
        xor     edx,ebp
        pxor    xmm0,xmm8
        add     ebx,ecx
        add     eax,DWORD[4+rsp]
        and     edi,edx
        xor     edx,ebp
        ror     ecx,7
        movdqa  xmm8,xmm0
        mov     esi,ebx
        xor     edi,edx
        movdqa  XMMWORD[48+rsp],xmm9
        rol     ebx,5
        add     eax,edi
        xor     esi,ecx
        pslld   xmm0,2
        xor     ecx,edx
        add     eax,ebx
        psrld   xmm8,30
        add     ebp,DWORD[8+rsp]
        and     esi,ecx
        xor     ecx,edx
        por     xmm0,xmm8
        ror     ebx,7
        mov     edi,eax
        xor     esi,ecx
        rol     eax,5
        pshufd  xmm9,xmm7,238
        add     ebp,esi
        xor     edi,ebx
        xor     ebx,ecx
        add     ebp,eax
        add     edx,DWORD[12+rsp]
        and     edi,ebx
        xor     ebx,ecx
        ror     eax,7
        mov     esi,ebp
        xor     edi,ebx
        rol     ebp,5
        add     edx,edi
        xor     esi,eax
        xor     eax,ebx
        add     edx,ebp
        pxor    xmm1,xmm5
        add     ecx,DWORD[16+rsp]
        and     esi,eax
        xor     eax,ebx
        ror     ebp,7
        punpcklqdq      xmm9,xmm0
        mov     edi,edx
        xor     esi,eax
        pxor    xmm1,xmm2
        rol     edx,5
        add     ecx,esi
        movdqa  xmm8,xmm10
        xor     edi,ebp
        paddd   xmm10,xmm0
        xor     ebp,eax
        pxor    xmm1,xmm9
        add     ecx,edx
        add     ebx,DWORD[20+rsp]
        and     edi,ebp
        xor     ebp,eax
        ror     edx,7
        movdqa  xmm9,xmm1
        mov     esi,ecx
        xor     edi,ebp
        movdqa  XMMWORD[rsp],xmm10
        rol     ecx,5
        add     ebx,edi
        xor     esi,edx
        pslld   xmm1,2
        xor     edx,ebp
        add     ebx,ecx
        psrld   xmm9,30
        add     eax,DWORD[24+rsp]
        and     esi,edx
        xor     edx,ebp
        por     xmm1,xmm9
        ror     ecx,7
        mov     edi,ebx
        xor     esi,edx
        rol     ebx,5
        pshufd  xmm10,xmm0,238
        add     eax,esi
        xor     edi,ecx
        xor     ecx,edx
        add     eax,ebx
        add     ebp,DWORD[28+rsp]
        and     edi,ecx
        xor     ecx,edx
        ror     ebx,7
        mov     esi,eax
        xor     edi,ecx
        rol     eax,5
        add     ebp,edi
        xor     esi,ebx
        xor     ebx,ecx
        add     ebp,eax
        pxor    xmm2,xmm6
        add     edx,DWORD[32+rsp]
        and     esi,ebx
        xor     ebx,ecx
        ror     eax,7
        punpcklqdq      xmm10,xmm1
        mov     edi,ebp
        xor     esi,ebx
        pxor    xmm2,xmm3
        rol     ebp,5
        add     edx,esi
        movdqa  xmm9,xmm8
        xor     edi,eax
        paddd   xmm8,xmm1
        xor     eax,ebx
        pxor    xmm2,xmm10
        add     edx,ebp
        add     ecx,DWORD[36+rsp]
        and     edi,eax
        xor     eax,ebx
        ror     ebp,7
        movdqa  xmm10,xmm2
        mov     esi,edx
        xor     edi,eax
        movdqa  XMMWORD[16+rsp],xmm8
        rol     edx,5
        add     ecx,edi
        xor     esi,ebp
        pslld   xmm2,2
        xor     ebp,eax
        add     ecx,edx
        psrld   xmm10,30
        add     ebx,DWORD[40+rsp]
        and     esi,ebp
        xor     ebp,eax
        por     xmm2,xmm10
        ror     edx,7
        mov     edi,ecx
        xor     esi,ebp
        rol     ecx,5
        pshufd  xmm8,xmm1,238
        add     ebx,esi
        xor     edi,edx
        xor     edx,ebp
        add     ebx,ecx
        add     eax,DWORD[44+rsp]
        and     edi,edx
        xor     edx,ebp
        ror     ecx,7
        mov     esi,ebx
        xor     edi,edx
        rol     ebx,5
        add     eax,edi
        xor     esi,edx
        add     eax,ebx
        pxor    xmm3,xmm7
        add     ebp,DWORD[48+rsp]
        xor     esi,ecx
        punpcklqdq      xmm8,xmm2
        mov     edi,eax
        rol     eax,5
        pxor    xmm3,xmm4
        add     ebp,esi
        xor     edi,ecx
        movdqa  xmm10,xmm9
        ror     ebx,7
        paddd   xmm9,xmm2
        add     ebp,eax
        pxor    xmm3,xmm8
        add     edx,DWORD[52+rsp]
        xor     edi,ebx
        mov     esi,ebp
        rol     ebp,5
        movdqa  xmm8,xmm3
        add     edx,edi
        xor     esi,ebx
        movdqa  XMMWORD[32+rsp],xmm9
        ror     eax,7
        add     edx,ebp
        add     ecx,DWORD[56+rsp]
        pslld   xmm3,2
        xor     esi,eax
        mov     edi,edx
        psrld   xmm8,30
        rol     edx,5
        add     ecx,esi
        xor     edi,eax
        ror     ebp,7
        por     xmm3,xmm8
        add     ecx,edx
        add     ebx,DWORD[60+rsp]
        xor     edi,ebp
        mov     esi,ecx
        rol     ecx,5
        add     ebx,edi
        xor     esi,ebp
        ror     edx,7
        add     ebx,ecx
        add     eax,DWORD[rsp]
        xor     esi,edx
        mov     edi,ebx
        rol     ebx,5
        paddd   xmm10,xmm3
        add     eax,esi
        xor     edi,edx
        movdqa  XMMWORD[48+rsp],xmm10
        ror     ecx,7
        add     eax,ebx
        add     ebp,DWORD[4+rsp]
        xor     edi,ecx
        mov     esi,eax
        rol     eax,5
        add     ebp,edi
        xor     esi,ecx
        ror     ebx,7
        add     ebp,eax
        add     edx,DWORD[8+rsp]
        xor     esi,ebx
        mov     edi,ebp
        rol     ebp,5
        add     edx,esi
        xor     edi,ebx
        ror     eax,7
        add     edx,ebp
        add     ecx,DWORD[12+rsp]
        xor     edi,eax
        mov     esi,edx
        rol     edx,5
        add     ecx,edi
        xor     esi,eax
        ror     ebp,7
        add     ecx,edx
        cmp     r9,r10
        je      NEAR $L$done_ssse3
        movdqa  xmm6,XMMWORD[64+r14]
        movdqa  xmm9,XMMWORD[((-64))+r14]
        movdqu  xmm0,XMMWORD[r9]
        movdqu  xmm1,XMMWORD[16+r9]
        movdqu  xmm2,XMMWORD[32+r9]
        movdqu  xmm3,XMMWORD[48+r9]
DB      102,15,56,0,198
        add     r9,64
        add     ebx,DWORD[16+rsp]
        xor     esi,ebp
        mov     edi,ecx
DB      102,15,56,0,206
        rol     ecx,5
        add     ebx,esi
        xor     edi,ebp
        ror     edx,7
        paddd   xmm0,xmm9
        add     ebx,ecx
        add     eax,DWORD[20+rsp]
        xor     edi,edx
        mov     esi,ebx
        movdqa  XMMWORD[rsp],xmm0
        rol     ebx,5
        add     eax,edi
        xor     esi,edx
        ror     ecx,7
        psubd   xmm0,xmm9
        add     eax,ebx
        add     ebp,DWORD[24+rsp]
        xor     esi,ecx
        mov     edi,eax
        rol     eax,5
        add     ebp,esi
        xor     edi,ecx
        ror     ebx,7
        add     ebp,eax
        add     edx,DWORD[28+rsp]
        xor     edi,ebx
        mov     esi,ebp
        rol     ebp,5
        add     edx,edi
        xor     esi,ebx
        ror     eax,7
        add     edx,ebp
        add     ecx,DWORD[32+rsp]
        xor     esi,eax
        mov     edi,edx
DB      102,15,56,0,214
        rol     edx,5
        add     ecx,esi
        xor     edi,eax
        ror     ebp,7
        paddd   xmm1,xmm9
        add     ecx,edx
        add     ebx,DWORD[36+rsp]
        xor     edi,ebp
        mov     esi,ecx
        movdqa  XMMWORD[16+rsp],xmm1
        rol     ecx,5
        add     ebx,edi
        xor     esi,ebp
        ror     edx,7
        psubd   xmm1,xmm9
        add     ebx,ecx
        add     eax,DWORD[40+rsp]
        xor     esi,edx
        mov     edi,ebx
        rol     ebx,5
        add     eax,esi
        xor     edi,edx
        ror     ecx,7
        add     eax,ebx
        add     ebp,DWORD[44+rsp]
        xor     edi,ecx
        mov     esi,eax
        rol     eax,5
        add     ebp,edi
        xor     esi,ecx
        ror     ebx,7
        add     ebp,eax
        add     edx,DWORD[48+rsp]
        xor     esi,ebx
        mov     edi,ebp
DB      102,15,56,0,222
        rol     ebp,5
        add     edx,esi
        xor     edi,ebx
        ror     eax,7
        paddd   xmm2,xmm9
        add     edx,ebp
        add     ecx,DWORD[52+rsp]
        xor     edi,eax
        mov     esi,edx
        movdqa  XMMWORD[32+rsp],xmm2
        rol     edx,5
        add     ecx,edi
        xor     esi,eax
        ror     ebp,7
        psubd   xmm2,xmm9
        add     ecx,edx
        add     ebx,DWORD[56+rsp]
        xor     esi,ebp
        mov     edi,ecx
        rol     ecx,5
        add     ebx,esi
        xor     edi,ebp
        ror     edx,7
        add     ebx,ecx
        add     eax,DWORD[60+rsp]
        xor     edi,edx
        mov     esi,ebx
        rol     ebx,5
        add     eax,edi
        ror     ecx,7
        add     eax,ebx
        add     eax,DWORD[r8]
        add     esi,DWORD[4+r8]
        add     ecx,DWORD[8+r8]
        add     edx,DWORD[12+r8]
        mov     DWORD[r8],eax
        add     ebp,DWORD[16+r8]
        mov     DWORD[4+r8],esi
        mov     ebx,esi
        mov     DWORD[8+r8],ecx
        mov     edi,ecx
        mov     DWORD[12+r8],edx
        xor     edi,edx
        mov     DWORD[16+r8],ebp
        and     esi,edi
        jmp     NEAR $L$oop_ssse3

ALIGN   16
$L$done_ssse3:
        add     ebx,DWORD[16+rsp]
        xor     esi,ebp
        mov     edi,ecx
        rol     ecx,5
        add     ebx,esi
        xor     edi,ebp
        ror     edx,7
        add     ebx,ecx
        add     eax,DWORD[20+rsp]
        xor     edi,edx
        mov     esi,ebx
        rol     ebx,5
        add     eax,edi
        xor     esi,edx
        ror     ecx,7
        add     eax,ebx
        add     ebp,DWORD[24+rsp]
        xor     esi,ecx
        mov     edi,eax
        rol     eax,5
        add     ebp,esi
        xor     edi,ecx
        ror     ebx,7
        add     ebp,eax
        add     edx,DWORD[28+rsp]
        xor     edi,ebx
        mov     esi,ebp
        rol     ebp,5
        add     edx,edi
        xor     esi,ebx
        ror     eax,7
        add     edx,ebp
        add     ecx,DWORD[32+rsp]
        xor     esi,eax
        mov     edi,edx
        rol     edx,5
        add     ecx,esi
        xor     edi,eax
        ror     ebp,7
        add     ecx,edx
        add     ebx,DWORD[36+rsp]
        xor     edi,ebp
        mov     esi,ecx
        rol     ecx,5
        add     ebx,edi
        xor     esi,ebp
        ror     edx,7
        add     ebx,ecx
        add     eax,DWORD[40+rsp]
        xor     esi,edx
        mov     edi,ebx
        rol     ebx,5
        add     eax,esi
        xor     edi,edx
        ror     ecx,7
        add     eax,ebx
        add     ebp,DWORD[44+rsp]
        xor     edi,ecx
        mov     esi,eax
        rol     eax,5
        add     ebp,edi
        xor     esi,ecx
        ror     ebx,7
        add     ebp,eax
        add     edx,DWORD[48+rsp]
        xor     esi,ebx
        mov     edi,ebp
        rol     ebp,5
        add     edx,esi
        xor     edi,ebx
        ror     eax,7
        add     edx,ebp
        add     ecx,DWORD[52+rsp]
        xor     edi,eax
        mov     esi,edx
        rol     edx,5
        add     ecx,edi
        xor     esi,eax
        ror     ebp,7
        add     ecx,edx
        add     ebx,DWORD[56+rsp]
        xor     esi,ebp
        mov     edi,ecx
        rol     ecx,5
        add     ebx,esi
        xor     edi,ebp
        ror     edx,7
        add     ebx,ecx
        add     eax,DWORD[60+rsp]
        xor     edi,edx
        mov     esi,ebx
        rol     ebx,5
        add     eax,edi
        ror     ecx,7
        add     eax,ebx
        add     eax,DWORD[r8]
        add     esi,DWORD[4+r8]
        add     ecx,DWORD[8+r8]
        mov     DWORD[r8],eax
        add     edx,DWORD[12+r8]
        mov     DWORD[4+r8],esi
        add     ebp,DWORD[16+r8]
        mov     DWORD[8+r8],ecx
        mov     DWORD[12+r8],edx
        mov     DWORD[16+r8],ebp
        movaps  xmm6,XMMWORD[((-40-96))+r11]
        movaps  xmm7,XMMWORD[((-40-80))+r11]
        movaps  xmm8,XMMWORD[((-40-64))+r11]
        movaps  xmm9,XMMWORD[((-40-48))+r11]
        movaps  xmm10,XMMWORD[((-40-32))+r11]
        movaps  xmm11,XMMWORD[((-40-16))+r11]
        mov     r14,QWORD[((-40))+r11]

        mov     r13,QWORD[((-32))+r11]

        mov     r12,QWORD[((-24))+r11]

        mov     rbp,QWORD[((-16))+r11]

        mov     rbx,QWORD[((-8))+r11]

        lea     rsp,[r11]

$L$epilogue_ssse3:
        mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
        mov     rsi,QWORD[16+rsp]
        DB      0F3h,0C3h               ;repret

$L$SEH_end_sha1_block_data_order_ssse3:

ALIGN   16
sha1_block_data_order_avx:
        mov     QWORD[8+rsp],rdi        ;WIN64 prologue
        mov     QWORD[16+rsp],rsi
        mov     rax,rsp
$L$SEH_begin_sha1_block_data_order_avx:
        mov     rdi,rcx
        mov     rsi,rdx
        mov     rdx,r8


_avx_shortcut:

        mov     r11,rsp

        push    rbx

        push    rbp

        push    r12

        push    r13

        push    r14

        lea     rsp,[((-160))+rsp]
        vzeroupper
        vmovaps XMMWORD[(-40-96)+r11],xmm6
        vmovaps XMMWORD[(-40-80)+r11],xmm7
        vmovaps XMMWORD[(-40-64)+r11],xmm8
        vmovaps XMMWORD[(-40-48)+r11],xmm9
        vmovaps XMMWORD[(-40-32)+r11],xmm10
        vmovaps XMMWORD[(-40-16)+r11],xmm11
$L$prologue_avx:
        and     rsp,-64
        mov     r8,rdi
        mov     r9,rsi
        mov     r10,rdx

        shl     r10,6
        add     r10,r9
        lea     r14,[((K_XX_XX+64))]

        mov     eax,DWORD[r8]
        mov     ebx,DWORD[4+r8]
        mov     ecx,DWORD[8+r8]
        mov     edx,DWORD[12+r8]
        mov     esi,ebx
        mov     ebp,DWORD[16+r8]
        mov     edi,ecx
        xor     edi,edx
        and     esi,edi

        vmovdqa xmm6,XMMWORD[64+r14]
        vmovdqa xmm11,XMMWORD[((-64))+r14]
        vmovdqu xmm0,XMMWORD[r9]
        vmovdqu xmm1,XMMWORD[16+r9]
        vmovdqu xmm2,XMMWORD[32+r9]
        vmovdqu xmm3,XMMWORD[48+r9]
        vpshufb xmm0,xmm0,xmm6
        add     r9,64
        vpshufb xmm1,xmm1,xmm6
        vpshufb xmm2,xmm2,xmm6
        vpshufb xmm3,xmm3,xmm6
        vpaddd  xmm4,xmm0,xmm11
        vpaddd  xmm5,xmm1,xmm11
        vpaddd  xmm6,xmm2,xmm11
        vmovdqa XMMWORD[rsp],xmm4
        vmovdqa XMMWORD[16+rsp],xmm5
        vmovdqa XMMWORD[32+rsp],xmm6
        jmp     NEAR $L$oop_avx
ALIGN   16
$L$oop_avx:
        shrd    ebx,ebx,2
        xor     esi,edx
        vpalignr        xmm4,xmm1,xmm0,8
        mov     edi,eax
        add     ebp,DWORD[rsp]
        vpaddd  xmm9,xmm11,xmm3
        xor     ebx,ecx
        shld    eax,eax,5
        vpsrldq xmm8,xmm3,4
        add     ebp,esi
        and     edi,ebx
        vpxor   xmm4,xmm4,xmm0
        xor     ebx,ecx
        add     ebp,eax
        vpxor   xmm8,xmm8,xmm2
        shrd    eax,eax,7
        xor     edi,ecx
        mov     esi,ebp
        add     edx,DWORD[4+rsp]
        vpxor   xmm4,xmm4,xmm8
        xor     eax,ebx
        shld    ebp,ebp,5
        vmovdqa XMMWORD[48+rsp],xmm9
        add     edx,edi
        and     esi,eax
        vpsrld  xmm8,xmm4,31
        xor     eax,ebx
        add     edx,ebp
        shrd    ebp,ebp,7
        xor     esi,ebx
        vpslldq xmm10,xmm4,12
        vpaddd  xmm4,xmm4,xmm4
        mov     edi,edx
        add     ecx,DWORD[8+rsp]
        xor     ebp,eax
        shld    edx,edx,5
        vpsrld  xmm9,xmm10,30
        vpor    xmm4,xmm4,xmm8
        add     ecx,esi
        and     edi,ebp
        xor     ebp,eax
        add     ecx,edx
        vpslld  xmm10,xmm10,2
        vpxor   xmm4,xmm4,xmm9
        shrd    edx,edx,7
        xor     edi,eax
        mov     esi,ecx
        add     ebx,DWORD[12+rsp]
        vpxor   xmm4,xmm4,xmm10
        xor     edx,ebp
        shld    ecx,ecx,5
        add     ebx,edi
        and     esi,edx
        xor     edx,ebp
        add     ebx,ecx
        shrd    ecx,ecx,7
        xor     esi,ebp
        vpalignr        xmm5,xmm2,xmm1,8
        mov     edi,ebx
        add     eax,DWORD[16+rsp]
        vpaddd  xmm9,xmm11,xmm4
        xor     ecx,edx
        shld    ebx,ebx,5
        vpsrldq xmm8,xmm4,4
        add     eax,esi
        and     edi,ecx
        vpxor   xmm5,xmm5,xmm1
        xor     ecx,edx
        add     eax,ebx
        vpxor   xmm8,xmm8,xmm3
        shrd    ebx,ebx,7
        xor     edi,edx
        mov     esi,eax
        add     ebp,DWORD[20+rsp]
        vpxor   xmm5,xmm5,xmm8
        xor     ebx,ecx
        shld    eax,eax,5
        vmovdqa XMMWORD[rsp],xmm9
        add     ebp,edi
        and     esi,ebx
        vpsrld  xmm8,xmm5,31
        xor     ebx,ecx
        add     ebp,eax
        shrd    eax,eax,7
        xor     esi,ecx
        vpslldq xmm10,xmm5,12
        vpaddd  xmm5,xmm5,xmm5
        mov     edi,ebp
        add     edx,DWORD[24+rsp]
        xor     eax,ebx
        shld    ebp,ebp,5
        vpsrld  xmm9,xmm10,30
        vpor    xmm5,xmm5,xmm8
        add     edx,esi
        and     edi,eax
        xor     eax,ebx
        add     edx,ebp
        vpslld  xmm10,xmm10,2
        vpxor   xmm5,xmm5,xmm9
        shrd    ebp,ebp,7
        xor     edi,ebx
        mov     esi,edx
        add     ecx,DWORD[28+rsp]
        vpxor   xmm5,xmm5,xmm10
        xor     ebp,eax
        shld    edx,edx,5
        vmovdqa xmm11,XMMWORD[((-32))+r14]
        add     ecx,edi
        and     esi,ebp
        xor     ebp,eax
        add     ecx,edx
        shrd    edx,edx,7
        xor     esi,eax
        vpalignr        xmm6,xmm3,xmm2,8
        mov     edi,ecx
        add     ebx,DWORD[32+rsp]
        vpaddd  xmm9,xmm11,xmm5
        xor     edx,ebp
        shld    ecx,ecx,5
        vpsrldq xmm8,xmm5,4
        add     ebx,esi
        and     edi,edx
        vpxor   xmm6,xmm6,xmm2
        xor     edx,ebp
        add     ebx,ecx
        vpxor   xmm8,xmm8,xmm4
        shrd    ecx,ecx,7
        xor     edi,ebp
        mov     esi,ebx
        add     eax,DWORD[36+rsp]
        vpxor   xmm6,xmm6,xmm8
        xor     ecx,edx
        shld    ebx,ebx,5
        vmovdqa XMMWORD[16+rsp],xmm9
        add     eax,edi
        and     esi,ecx
        vpsrld  xmm8,xmm6,31
        xor     ecx,edx
        add     eax,ebx
        shrd    ebx,ebx,7
        xor     esi,edx
        vpslldq xmm10,xmm6,12
        vpaddd  xmm6,xmm6,xmm6
        mov     edi,eax
        add     ebp,DWORD[40+rsp]
        xor     ebx,ecx
        shld    eax,eax,5
        vpsrld  xmm9,xmm10,30
        vpor    xmm6,xmm6,xmm8
        add     ebp,esi
        and     edi,ebx
        xor     ebx,ecx
        add     ebp,eax
        vpslld  xmm10,xmm10,2
        vpxor   xmm6,xmm6,xmm9
        shrd    eax,eax,7
        xor     edi,ecx
        mov     esi,ebp
        add     edx,DWORD[44+rsp]
        vpxor   xmm6,xmm6,xmm10
        xor     eax,ebx
        shld    ebp,ebp,5
        add     edx,edi
        and     esi,eax
        xor     eax,ebx
        add     edx,ebp
        shrd    ebp,ebp,7
        xor     esi,ebx
        vpalignr        xmm7,xmm4,xmm3,8
        mov     edi,edx
        add     ecx,DWORD[48+rsp]
        vpaddd  xmm9,xmm11,xmm6
        xor     ebp,eax
        shld    edx,edx,5
        vpsrldq xmm8,xmm6,4
        add     ecx,esi
        and     edi,ebp
        vpxor   xmm7,xmm7,xmm3
        xor     ebp,eax
        add     ecx,edx
        vpxor   xmm8,xmm8,xmm5
        shrd    edx,edx,7
        xor     edi,eax
        mov     esi,ecx
        add     ebx,DWORD[52+rsp]
        vpxor   xmm7,xmm7,xmm8
        xor     edx,ebp
        shld    ecx,ecx,5
        vmovdqa XMMWORD[32+rsp],xmm9
        add     ebx,edi
        and     esi,edx
        vpsrld  xmm8,xmm7,31
        xor     edx,ebp
        add     ebx,ecx
        shrd    ecx,ecx,7
        xor     esi,ebp
        vpslldq xmm10,xmm7,12
        vpaddd  xmm7,xmm7,xmm7
        mov     edi,ebx
        add     eax,DWORD[56+rsp]
        xor     ecx,edx
        shld    ebx,ebx,5
        vpsrld  xmm9,xmm10,30
        vpor    xmm7,xmm7,xmm8
        add     eax,esi
        and     edi,ecx
        xor     ecx,edx
        add     eax,ebx
        vpslld  xmm10,xmm10,2
        vpxor   xmm7,xmm7,xmm9
        shrd    ebx,ebx,7
        xor     edi,edx
        mov     esi,eax
        add     ebp,DWORD[60+rsp]
        vpxor   xmm7,xmm7,xmm10
        xor     ebx,ecx
        shld    eax,eax,5
        add     ebp,edi
        and     esi,ebx
        xor     ebx,ecx
        add     ebp,eax
        vpalignr        xmm8,xmm7,xmm6,8
        vpxor   xmm0,xmm0,xmm4
        shrd    eax,eax,7
        xor     esi,ecx
        mov     edi,ebp
        add     edx,DWORD[rsp]
        vpxor   xmm0,xmm0,xmm1
        xor     eax,ebx
        shld    ebp,ebp,5
        vpaddd  xmm9,xmm11,xmm7
        add     edx,esi
        and     edi,eax
        vpxor   xmm0,xmm0,xmm8
        xor     eax,ebx
        add     edx,ebp
        shrd    ebp,ebp,7
        xor     edi,ebx
        vpsrld  xmm8,xmm0,30
        vmovdqa XMMWORD[48+rsp],xmm9
        mov     esi,edx
        add     ecx,DWORD[4+rsp]
        xor     ebp,eax
        shld    edx,edx,5
        vpslld  xmm0,xmm0,2
        add     ecx,edi
        and     esi,ebp
        xor     ebp,eax
        add     ecx,edx
        shrd    edx,edx,7
        xor     esi,eax
        mov     edi,ecx
        add     ebx,DWORD[8+rsp]
        vpor    xmm0,xmm0,xmm8
        xor     edx,ebp
        shld    ecx,ecx,5
        add     ebx,esi
        and     edi,edx
        xor     edx,ebp
        add     ebx,ecx
        add     eax,DWORD[12+rsp]
        xor     edi,ebp
        mov     esi,ebx
        shld    ebx,ebx,5
        add     eax,edi
        xor     esi,edx
        shrd    ecx,ecx,7
        add     eax,ebx
        vpalignr        xmm8,xmm0,xmm7,8
        vpxor   xmm1,xmm1,xmm5
        add     ebp,DWORD[16+rsp]
        xor     esi,ecx
        mov     edi,eax
        shld    eax,eax,5
        vpxor   xmm1,xmm1,xmm2
        add     ebp,esi
        xor     edi,ecx
        vpaddd  xmm9,xmm11,xmm0
        shrd    ebx,ebx,7
        add     ebp,eax
        vpxor   xmm1,xmm1,xmm8
        add     edx,DWORD[20+rsp]
        xor     edi,ebx
        mov     esi,ebp
        shld    ebp,ebp,5
        vpsrld  xmm8,xmm1,30
        vmovdqa XMMWORD[rsp],xmm9
        add     edx,edi
        xor     esi,ebx
        shrd    eax,eax,7
        add     edx,ebp
        vpslld  xmm1,xmm1,2
        add     ecx,DWORD[24+rsp]
        xor     esi,eax
        mov     edi,edx
        shld    edx,edx,5
        add     ecx,esi
        xor     edi,eax
        shrd    ebp,ebp,7
        add     ecx,edx
        vpor    xmm1,xmm1,xmm8
        add     ebx,DWORD[28+rsp]
        xor     edi,ebp
        mov     esi,ecx
        shld    ecx,ecx,5
        add     ebx,edi
        xor     esi,ebp
        shrd    edx,edx,7
        add     ebx,ecx
        vpalignr        xmm8,xmm1,xmm0,8
        vpxor   xmm2,xmm2,xmm6
        add     eax,DWORD[32+rsp]
        xor     esi,edx
        mov     edi,ebx
        shld    ebx,ebx,5
        vpxor   xmm2,xmm2,xmm3
        add     eax,esi
        xor     edi,edx
        vpaddd  xmm9,xmm11,xmm1
        vmovdqa xmm11,XMMWORD[r14]
        shrd    ecx,ecx,7
        add     eax,ebx
        vpxor   xmm2,xmm2,xmm8
        add     ebp,DWORD[36+rsp]
        xor     edi,ecx
        mov     esi,eax
        shld    eax,eax,5
        vpsrld  xmm8,xmm2,30
        vmovdqa XMMWORD[16+rsp],xmm9
        add     ebp,edi
        xor     esi,ecx
        shrd    ebx,ebx,7
        add     ebp,eax
        vpslld  xmm2,xmm2,2
        add     edx,DWORD[40+rsp]
        xor     esi,ebx
        mov     edi,ebp
        shld    ebp,ebp,5
        add     edx,esi
        xor     edi,ebx
        shrd    eax,eax,7
        add     edx,ebp
        vpor    xmm2,xmm2,xmm8
        add     ecx,DWORD[44+rsp]
        xor     edi,eax
        mov     esi,edx
        shld    edx,edx,5
        add     ecx,edi
        xor     esi,eax
        shrd    ebp,ebp,7
        add     ecx,edx
        vpalignr        xmm8,xmm2,xmm1,8
        vpxor   xmm3,xmm3,xmm7
        add     ebx,DWORD[48+rsp]
        xor     esi,ebp
        mov     edi,ecx
        shld    ecx,ecx,5
        vpxor   xmm3,xmm3,xmm4
        add     ebx,esi
        xor     edi,ebp
        vpaddd  xmm9,xmm11,xmm2
        shrd    edx,edx,7
        add     ebx,ecx
        vpxor   xmm3,xmm3,xmm8
        add     eax,DWORD[52+rsp]
        xor     edi,edx
        mov     esi,ebx
        shld    ebx,ebx,5
        vpsrld  xmm8,xmm3,30
        vmovdqa XMMWORD[32+rsp],xmm9
        add     eax,edi
        xor     esi,edx
        shrd    ecx,ecx,7
        add     eax,ebx
        vpslld  xmm3,xmm3,2
        add     ebp,DWORD[56+rsp]
        xor     esi,ecx
        mov     edi,eax
        shld    eax,eax,5
        add     ebp,esi
        xor     edi,ecx
        shrd    ebx,ebx,7
        add     ebp,eax
        vpor    xmm3,xmm3,xmm8
        add     edx,DWORD[60+rsp]
        xor     edi,ebx
        mov     esi,ebp
        shld    ebp,ebp,5
        add     edx,edi
        xor     esi,ebx
        shrd    eax,eax,7
        add     edx,ebp
        vpalignr        xmm8,xmm3,xmm2,8
        vpxor   xmm4,xmm4,xmm0
        add     ecx,DWORD[rsp]
        xor     esi,eax
        mov     edi,edx
        shld    edx,edx,5
        vpxor   xmm4,xmm4,xmm5
        add     ecx,esi
        xor     edi,eax
        vpaddd  xmm9,xmm11,xmm3
        shrd    ebp,ebp,7
        add     ecx,edx
        vpxor   xmm4,xmm4,xmm8
        add     ebx,DWORD[4+rsp]
        xor     edi,ebp
        mov     esi,ecx
        shld    ecx,ecx,5
        vpsrld  xmm8,xmm4,30
        vmovdqa XMMWORD[48+rsp],xmm9
        add     ebx,edi
        xor     esi,ebp
        shrd    edx,edx,7
        add     ebx,ecx
        vpslld  xmm4,xmm4,2
        add     eax,DWORD[8+rsp]
        xor     esi,edx
        mov     edi,ebx
        shld    ebx,ebx,5
        add     eax,esi
        xor     edi,edx
        shrd    ecx,ecx,7
        add     eax,ebx
        vpor    xmm4,xmm4,xmm8
        add     ebp,DWORD[12+rsp]
        xor     edi,ecx
        mov     esi,eax
        shld    eax,eax,5
        add     ebp,edi
        xor     esi,ecx
        shrd    ebx,ebx,7
        add     ebp,eax
        vpalignr        xmm8,xmm4,xmm3,8
        vpxor   xmm5,xmm5,xmm1
        add     edx,DWORD[16+rsp]
        xor     esi,ebx
        mov     edi,ebp
        shld    ebp,ebp,5
        vpxor   xmm5,xmm5,xmm6
        add     edx,esi
        xor     edi,ebx
        vpaddd  xmm9,xmm11,xmm4
        shrd    eax,eax,7
        add     edx,ebp
        vpxor   xmm5,xmm5,xmm8
        add     ecx,DWORD[20+rsp]
        xor     edi,eax
        mov     esi,edx
        shld    edx,edx,5
        vpsrld  xmm8,xmm5,30
        vmovdqa XMMWORD[rsp],xmm9
        add     ecx,edi
        xor     esi,eax
        shrd    ebp,ebp,7
        add     ecx,edx
        vpslld  xmm5,xmm5,2
        add     ebx,DWORD[24+rsp]
        xor     esi,ebp
        mov     edi,ecx
        shld    ecx,ecx,5
        add     ebx,esi
        xor     edi,ebp
        shrd    edx,edx,7
        add     ebx,ecx
        vpor    xmm5,xmm5,xmm8
        add     eax,DWORD[28+rsp]
        shrd    ecx,ecx,7
        mov     esi,ebx
        xor     edi,edx
        shld    ebx,ebx,5
        add     eax,edi
        xor     esi,ecx
        xor     ecx,edx
        add     eax,ebx
        vpalignr        xmm8,xmm5,xmm4,8
        vpxor   xmm6,xmm6,xmm2
        add     ebp,DWORD[32+rsp]
        and     esi,ecx
        xor     ecx,edx
        shrd    ebx,ebx,7
        vpxor   xmm6,xmm6,xmm7
        mov     edi,eax
        xor     esi,ecx
        vpaddd  xmm9,xmm11,xmm5
        shld    eax,eax,5
        add     ebp,esi
        vpxor   xmm6,xmm6,xmm8
        xor     edi,ebx
        xor     ebx,ecx
        add     ebp,eax
        add     edx,DWORD[36+rsp]
        vpsrld  xmm8,xmm6,30
        vmovdqa XMMWORD[16+rsp],xmm9
        and     edi,ebx
        xor     ebx,ecx
        shrd    eax,eax,7
        mov     esi,ebp
        vpslld  xmm6,xmm6,2
        xor     edi,ebx
        shld    ebp,ebp,5
        add     edx,edi
        xor     esi,eax
        xor     eax,ebx
        add     edx,ebp
        add     ecx,DWORD[40+rsp]
        and     esi,eax
        vpor    xmm6,xmm6,xmm8
        xor     eax,ebx
        shrd    ebp,ebp,7
        mov     edi,edx
        xor     esi,eax
        shld    edx,edx,5
        add     ecx,esi
        xor     edi,ebp
        xor     ebp,eax
        add     ecx,edx
        add     ebx,DWORD[44+rsp]
        and     edi,ebp
        xor     ebp,eax
        shrd    edx,edx,7
        mov     esi,ecx
        xor     edi,ebp
        shld    ecx,ecx,5
        add     ebx,edi
        xor     esi,edx
        xor     edx,ebp
        add     ebx,ecx
        vpalignr        xmm8,xmm6,xmm5,8
        vpxor   xmm7,xmm7,xmm3
        add     eax,DWORD[48+rsp]
        and     esi,edx
        xor     edx,ebp
        shrd    ecx,ecx,7
        vpxor   xmm7,xmm7,xmm0
        mov     edi,ebx
        xor     esi,edx
        vpaddd  xmm9,xmm11,xmm6
        vmovdqa xmm11,XMMWORD[32+r14]
        shld    ebx,ebx,5
        add     eax,esi
        vpxor   xmm7,xmm7,xmm8
        xor     edi,ecx
        xor     ecx,edx
        add     eax,ebx
        add     ebp,DWORD[52+rsp]
        vpsrld  xmm8,xmm7,30
        vmovdqa XMMWORD[32+rsp],xmm9
        and     edi,ecx
        xor     ecx,edx
        shrd    ebx,ebx,7
        mov     esi,eax
        vpslld  xmm7,xmm7,2
        xor     edi,ecx
        shld    eax,eax,5
        add     ebp,edi
        xor     esi,ebx
        xor     ebx,ecx
        add     ebp,eax
        add     edx,DWORD[56+rsp]
        and     esi,ebx
        vpor    xmm7,xmm7,xmm8
        xor     ebx,ecx
        shrd    eax,eax,7
        mov     edi,ebp
        xor     esi,ebx
        shld    ebp,ebp,5
        add     edx,esi
        xor     edi,eax
        xor     eax,ebx
        add     edx,ebp
        add     ecx,DWORD[60+rsp]
        and     edi,eax
        xor     eax,ebx
        shrd    ebp,ebp,7
        mov     esi,edx
        xor     edi,eax
        shld    edx,edx,5
        add     ecx,edi
        xor     esi,ebp
        xor     ebp,eax
        add     ecx,edx
        vpalignr        xmm8,xmm7,xmm6,8
        vpxor   xmm0,xmm0,xmm4
        add     ebx,DWORD[rsp]
        and     esi,ebp
        xor     ebp,eax
        shrd    edx,edx,7
        vpxor   xmm0,xmm0,xmm1
        mov     edi,ecx
        xor     esi,ebp
        vpaddd  xmm9,xmm11,xmm7
        shld    ecx,ecx,5
        add     ebx,esi
        vpxor   xmm0,xmm0,xmm8
        xor     edi,edx
        xor     edx,ebp
        add     ebx,ecx
        add     eax,DWORD[4+rsp]
        vpsrld  xmm8,xmm0,30
        vmovdqa XMMWORD[48+rsp],xmm9
        and     edi,edx
        xor     edx,ebp
        shrd    ecx,ecx,7
        mov     esi,ebx
        vpslld  xmm0,xmm0,2
        xor     edi,edx
        shld    ebx,ebx,5
        add     eax,edi
        xor     esi,ecx
        xor     ecx,edx
        add     eax,ebx
        add     ebp,DWORD[8+rsp]
        and     esi,ecx
        vpor    xmm0,xmm0,xmm8
        xor     ecx,edx
        shrd    ebx,ebx,7
        mov     edi,eax
        xor     esi,ecx
        shld    eax,eax,5
        add     ebp,esi
        xor     edi,ebx
        xor     ebx,ecx
        add     ebp,eax
        add     edx,DWORD[12+rsp]
        and     edi,ebx
        xor     ebx,ecx
        shrd    eax,eax,7
        mov     esi,ebp
        xor     edi,ebx
        shld    ebp,ebp,5
        add     edx,edi
        xor     esi,eax
        xor     eax,ebx
        add     edx,ebp
        vpalignr        xmm8,xmm0,xmm7,8
        vpxor   xmm1,xmm1,xmm5
        add     ecx,DWORD[16+rsp]
        and     esi,eax
        xor     eax,ebx
        shrd    ebp,ebp,7
        vpxor   xmm1,xmm1,xmm2
        mov     edi,edx
        xor     esi,eax
        vpaddd  xmm9,xmm11,xmm0
        shld    edx,edx,5
        add     ecx,esi
        vpxor   xmm1,xmm1,xmm8
        xor     edi,ebp
        xor     ebp,eax
        add     ecx,edx
        add     ebx,DWORD[20+rsp]
        vpsrld  xmm8,xmm1,30
        vmovdqa XMMWORD[rsp],xmm9
        and     edi,ebp
        xor     ebp,eax
        shrd    edx,edx,7
        mov     esi,ecx
        vpslld  xmm1,xmm1,2
        xor     edi,ebp
        shld    ecx,ecx,5
        add     ebx,edi
        xor     esi,edx
        xor     edx,ebp
        add     ebx,ecx
        add     eax,DWORD[24+rsp]
        and     esi,edx
        vpor    xmm1,xmm1,xmm8
        xor     edx,ebp
        shrd    ecx,ecx,7
        mov     edi,ebx
        xor     esi,edx
        shld    ebx,ebx,5
        add     eax,esi
        xor     edi,ecx
        xor     ecx,edx
        add     eax,ebx
        add     ebp,DWORD[28+rsp]
        and     edi,ecx
        xor     ecx,edx
        shrd    ebx,ebx,7
        mov     esi,eax
        xor     edi,ecx
        shld    eax,eax,5
        add     ebp,edi
        xor     esi,ebx
        xor     ebx,ecx
        add     ebp,eax
        vpalignr        xmm8,xmm1,xmm0,8
        vpxor   xmm2,xmm2,xmm6
        add     edx,DWORD[32+rsp]
        and     esi,ebx
        xor     ebx,ecx
        shrd    eax,eax,7
        vpxor   xmm2,xmm2,xmm3
        mov     edi,ebp
        xor     esi,ebx
        vpaddd  xmm9,xmm11,xmm1
        shld    ebp,ebp,5
        add     edx,esi
        vpxor   xmm2,xmm2,xmm8
        xor     edi,eax
        xor     eax,ebx
        add     edx,ebp
        add     ecx,DWORD[36+rsp]
        vpsrld  xmm8,xmm2,30
        vmovdqa XMMWORD[16+rsp],xmm9
        and     edi,eax
        xor     eax,ebx
        shrd    ebp,ebp,7
        mov     esi,edx
        vpslld  xmm2,xmm2,2
        xor     edi,eax
        shld    edx,edx,5
        add     ecx,edi
        xor     esi,ebp
        xor     ebp,eax
        add     ecx,edx
        add     ebx,DWORD[40+rsp]
        and     esi,ebp
        vpor    xmm2,xmm2,xmm8
        xor     ebp,eax
        shrd    edx,edx,7
        mov     edi,ecx
        xor     esi,ebp
        shld    ecx,ecx,5
        add     ebx,esi
        xor     edi,edx
        xor     edx,ebp
        add     ebx,ecx
        add     eax,DWORD[44+rsp]
        and     edi,edx
        xor     edx,ebp
        shrd    ecx,ecx,7
        mov     esi,ebx
        xor     edi,edx
        shld    ebx,ebx,5
        add     eax,edi
        xor     esi,edx
        add     eax,ebx
        vpalignr        xmm8,xmm2,xmm1,8
        vpxor   xmm3,xmm3,xmm7
        add     ebp,DWORD[48+rsp]
        xor     esi,ecx
        mov     edi,eax
        shld    eax,eax,5
        vpxor   xmm3,xmm3,xmm4
        add     ebp,esi
        xor     edi,ecx
        vpaddd  xmm9,xmm11,xmm2
        shrd    ebx,ebx,7
        add     ebp,eax
        vpxor   xmm3,xmm3,xmm8
        add     edx,DWORD[52+rsp]
        xor     edi,ebx
        mov     esi,ebp
        shld    ebp,ebp,5
        vpsrld  xmm8,xmm3,30
        vmovdqa XMMWORD[32+rsp],xmm9
        add     edx,edi
        xor     esi,ebx
        shrd    eax,eax,7
        add     edx,ebp
        vpslld  xmm3,xmm3,2
        add     ecx,DWORD[56+rsp]
        xor     esi,eax
        mov     edi,edx
        shld    edx,edx,5
        add     ecx,esi
        xor     edi,eax
        shrd    ebp,ebp,7
        add     ecx,edx
        vpor    xmm3,xmm3,xmm8
        add     ebx,DWORD[60+rsp]
        xor     edi,ebp
        mov     esi,ecx
        shld    ecx,ecx,5
        add     ebx,edi
        xor     esi,ebp
        shrd    edx,edx,7
        add     ebx,ecx
        add     eax,DWORD[rsp]
        vpaddd  xmm9,xmm11,xmm3
        xor     esi,edx
        mov     edi,ebx
        shld    ebx,ebx,5
        add     eax,esi
        vmovdqa XMMWORD[48+rsp],xmm9
        xor     edi,edx
        shrd    ecx,ecx,7
        add     eax,ebx
        add     ebp,DWORD[4+rsp]
        xor     edi,ecx
        mov     esi,eax
        shld    eax,eax,5
        add     ebp,edi
        xor     esi,ecx
        shrd    ebx,ebx,7
        add     ebp,eax
        add     edx,DWORD[8+rsp]
        xor     esi,ebx
        mov     edi,ebp
        shld    ebp,ebp,5
        add     edx,esi
        xor     edi,ebx
        shrd    eax,eax,7
        add     edx,ebp
        add     ecx,DWORD[12+rsp]
        xor     edi,eax
        mov     esi,edx
        shld    edx,edx,5
        add     ecx,edi
        xor     esi,eax
        shrd    ebp,ebp,7
        add     ecx,edx
        cmp     r9,r10
        je      NEAR $L$done_avx
        vmovdqa xmm6,XMMWORD[64+r14]
        vmovdqa xmm11,XMMWORD[((-64))+r14]
        vmovdqu xmm0,XMMWORD[r9]
        vmovdqu xmm1,XMMWORD[16+r9]
        vmovdqu xmm2,XMMWORD[32+r9]
        vmovdqu xmm3,XMMWORD[48+r9]
        vpshufb xmm0,xmm0,xmm6
        add     r9,64
        add     ebx,DWORD[16+rsp]
        xor     esi,ebp
        vpshufb xmm1,xmm1,xmm6
        mov     edi,ecx
        shld    ecx,ecx,5
        vpaddd  xmm4,xmm0,xmm11
        add     ebx,esi
        xor     edi,ebp
        shrd    edx,edx,7
        add     ebx,ecx
        vmovdqa XMMWORD[rsp],xmm4
        add     eax,DWORD[20+rsp]
        xor     edi,edx
        mov     esi,ebx
        shld    ebx,ebx,5
        add     eax,edi
        xor     esi,edx
        shrd    ecx,ecx,7
        add     eax,ebx
        add     ebp,DWORD[24+rsp]
        xor     esi,ecx
        mov     edi,eax
        shld    eax,eax,5
        add     ebp,esi
        xor     edi,ecx
        shrd    ebx,ebx,7
        add     ebp,eax
        add     edx,DWORD[28+rsp]
        xor     edi,ebx
        mov     esi,ebp
        shld    ebp,ebp,5
        add     edx,edi
        xor     esi,ebx
        shrd    eax,eax,7
        add     edx,ebp
        add     ecx,DWORD[32+rsp]
        xor     esi,eax
        vpshufb xmm2,xmm2,xmm6
        mov     edi,edx
        shld    edx,edx,5
        vpaddd  xmm5,xmm1,xmm11
        add     ecx,esi
        xor     edi,eax
        shrd    ebp,ebp,7
        add     ecx,edx
        vmovdqa XMMWORD[16+rsp],xmm5
        add     ebx,DWORD[36+rsp]
        xor     edi,ebp
        mov     esi,ecx
        shld    ecx,ecx,5
        add     ebx,edi
        xor     esi,ebp
        shrd    edx,edx,7
        add     ebx,ecx
        add     eax,DWORD[40+rsp]
        xor     esi,edx
        mov     edi,ebx
        shld    ebx,ebx,5
        add     eax,esi
        xor     edi,edx
        shrd    ecx,ecx,7
        add     eax,ebx
        add     ebp,DWORD[44+rsp]
        xor     edi,ecx
        mov     esi,eax
        shld    eax,eax,5
        add     ebp,edi
        xor     esi,ecx
        shrd    ebx,ebx,7
        add     ebp,eax
        add     edx,DWORD[48+rsp]
        xor     esi,ebx
        vpshufb xmm3,xmm3,xmm6
        mov     edi,ebp
        shld    ebp,ebp,5
        vpaddd  xmm6,xmm2,xmm11
        add     edx,esi
        xor     edi,ebx
        shrd    eax,eax,7
        add     edx,ebp
        vmovdqa XMMWORD[32+rsp],xmm6
        add     ecx,DWORD[52+rsp]
        xor     edi,eax
        mov     esi,edx
        shld    edx,edx,5
        add     ecx,edi
        xor     esi,eax
        shrd    ebp,ebp,7
        add     ecx,edx
        add     ebx,DWORD[56+rsp]
        xor     esi,ebp
        mov     edi,ecx
        shld    ecx,ecx,5
        add     ebx,esi
        xor     edi,ebp
        shrd    edx,edx,7
        add     ebx,ecx
        add     eax,DWORD[60+rsp]
        xor     edi,edx
        mov     esi,ebx
        shld    ebx,ebx,5
        add     eax,edi
        shrd    ecx,ecx,7
        add     eax,ebx
        add     eax,DWORD[r8]
        add     esi,DWORD[4+r8]
        add     ecx,DWORD[8+r8]
        add     edx,DWORD[12+r8]
        mov     DWORD[r8],eax
        add     ebp,DWORD[16+r8]
        mov     DWORD[4+r8],esi
        mov     ebx,esi
        mov     DWORD[8+r8],ecx
        mov     edi,ecx
        mov     DWORD[12+r8],edx
        xor     edi,edx
        mov     DWORD[16+r8],ebp
        and     esi,edi
        jmp     NEAR $L$oop_avx

ALIGN   16
$L$done_avx:
        add     ebx,DWORD[16+rsp]
        xor     esi,ebp
        mov     edi,ecx
        shld    ecx,ecx,5
        add     ebx,esi
        xor     edi,ebp
        shrd    edx,edx,7
        add     ebx,ecx
        add     eax,DWORD[20+rsp]
        xor     edi,edx
        mov     esi,ebx
        shld    ebx,ebx,5
        add     eax,edi
        xor     esi,edx
        shrd    ecx,ecx,7
        add     eax,ebx
        add     ebp,DWORD[24+rsp]
        xor     esi,ecx
        mov     edi,eax
        shld    eax,eax,5
        add     ebp,esi
        xor     edi,ecx
        shrd    ebx,ebx,7
        add     ebp,eax
        add     edx,DWORD[28+rsp]
        xor     edi,ebx
        mov     esi,ebp
        shld    ebp,ebp,5
        add     edx,edi
        xor     esi,ebx
        shrd    eax,eax,7
        add     edx,ebp
        add     ecx,DWORD[32+rsp]
        xor     esi,eax
        mov     edi,edx
        shld    edx,edx,5
        add     ecx,esi
        xor     edi,eax
        shrd    ebp,ebp,7
        add     ecx,edx
        add     ebx,DWORD[36+rsp]
        xor     edi,ebp
        mov     esi,ecx
        shld    ecx,ecx,5
        add     ebx,edi
        xor     esi,ebp
        shrd    edx,edx,7
        add     ebx,ecx
        add     eax,DWORD[40+rsp]
        xor     esi,edx
        mov     edi,ebx
        shld    ebx,ebx,5
        add     eax,esi
        xor     edi,edx
        shrd    ecx,ecx,7
        add     eax,ebx
        add     ebp,DWORD[44+rsp]
        xor     edi,ecx
        mov     esi,eax
        shld    eax,eax,5
        add     ebp,edi
        xor     esi,ecx
        shrd    ebx,ebx,7
        add     ebp,eax
        add     edx,DWORD[48+rsp]
        xor     esi,ebx
        mov     edi,ebp
        shld    ebp,ebp,5
        add     edx,esi
        xor     edi,ebx
        shrd    eax,eax,7
        add     edx,ebp
        add     ecx,DWORD[52+rsp]
        xor     edi,eax
        mov     esi,edx
        shld    edx,edx,5
        add     ecx,edi
        xor     esi,eax
        shrd    ebp,ebp,7
        add     ecx,edx
        add     ebx,DWORD[56+rsp]
        xor     esi,ebp
        mov     edi,ecx
        shld    ecx,ecx,5
        add     ebx,esi
        xor     edi,ebp
        shrd    edx,edx,7
        add     ebx,ecx
        add     eax,DWORD[60+rsp]
        xor     edi,edx
        mov     esi,ebx
        shld    ebx,ebx,5
        add     eax,edi
        shrd    ecx,ecx,7
        add     eax,ebx
        vzeroupper

        add     eax,DWORD[r8]
        add     esi,DWORD[4+r8]
        add     ecx,DWORD[8+r8]
        mov     DWORD[r8],eax
        add     edx,DWORD[12+r8]
        mov     DWORD[4+r8],esi
        add     ebp,DWORD[16+r8]
        mov     DWORD[8+r8],ecx
        mov     DWORD[12+r8],edx
        mov     DWORD[16+r8],ebp
        movaps  xmm6,XMMWORD[((-40-96))+r11]
        movaps  xmm7,XMMWORD[((-40-80))+r11]
        movaps  xmm8,XMMWORD[((-40-64))+r11]
        movaps  xmm9,XMMWORD[((-40-48))+r11]
        movaps  xmm10,XMMWORD[((-40-32))+r11]
        movaps  xmm11,XMMWORD[((-40-16))+r11]
        mov     r14,QWORD[((-40))+r11]

        mov     r13,QWORD[((-32))+r11]

        mov     r12,QWORD[((-24))+r11]

        mov     rbp,QWORD[((-16))+r11]

        mov     rbx,QWORD[((-8))+r11]

        lea     rsp,[r11]

$L$epilogue_avx:
        mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
        mov     rsi,QWORD[16+rsp]
        DB      0F3h,0C3h               ;repret

$L$SEH_end_sha1_block_data_order_avx:

ALIGN   16
sha1_block_data_order_avx2:
        mov     QWORD[8+rsp],rdi        ;WIN64 prologue
        mov     QWORD[16+rsp],rsi
        mov     rax,rsp
$L$SEH_begin_sha1_block_data_order_avx2:
        mov     rdi,rcx
        mov     rsi,rdx
        mov     rdx,r8


_avx2_shortcut:

        mov     r11,rsp

        push    rbx

        push    rbp

        push    r12

        push    r13

        push    r14

        vzeroupper
        lea     rsp,[((-96))+rsp]
        vmovaps XMMWORD[(-40-96)+r11],xmm6
        vmovaps XMMWORD[(-40-80)+r11],xmm7
        vmovaps XMMWORD[(-40-64)+r11],xmm8
        vmovaps XMMWORD[(-40-48)+r11],xmm9
        vmovaps XMMWORD[(-40-32)+r11],xmm10
        vmovaps XMMWORD[(-40-16)+r11],xmm11
$L$prologue_avx2:
        mov     r8,rdi
        mov     r9,rsi
        mov     r10,rdx

        lea     rsp,[((-640))+rsp]
        shl     r10,6
        lea     r13,[64+r9]
        and     rsp,-128
        add     r10,r9
        lea     r14,[((K_XX_XX+64))]

        mov     eax,DWORD[r8]
        cmp     r13,r10
        cmovae  r13,r9
        mov     ebp,DWORD[4+r8]
        mov     ecx,DWORD[8+r8]
        mov     edx,DWORD[12+r8]
        mov     esi,DWORD[16+r8]
        vmovdqu ymm6,YMMWORD[64+r14]

        vmovdqu xmm0,XMMWORD[r9]
        vmovdqu xmm1,XMMWORD[16+r9]
        vmovdqu xmm2,XMMWORD[32+r9]
        vmovdqu xmm3,XMMWORD[48+r9]
        lea     r9,[64+r9]
        vinserti128     ymm0,ymm0,XMMWORD[r13],1
        vinserti128     ymm1,ymm1,XMMWORD[16+r13],1
        vpshufb ymm0,ymm0,ymm6
        vinserti128     ymm2,ymm2,XMMWORD[32+r13],1
        vpshufb ymm1,ymm1,ymm6
        vinserti128     ymm3,ymm3,XMMWORD[48+r13],1
        vpshufb ymm2,ymm2,ymm6
        vmovdqu ymm11,YMMWORD[((-64))+r14]
        vpshufb ymm3,ymm3,ymm6

        vpaddd  ymm4,ymm0,ymm11
        vpaddd  ymm5,ymm1,ymm11
        vmovdqu YMMWORD[rsp],ymm4
        vpaddd  ymm6,ymm2,ymm11
        vmovdqu YMMWORD[32+rsp],ymm5
        vpaddd  ymm7,ymm3,ymm11
        vmovdqu YMMWORD[64+rsp],ymm6
        vmovdqu YMMWORD[96+rsp],ymm7
        vpalignr        ymm4,ymm1,ymm0,8
        vpsrldq ymm8,ymm3,4
        vpxor   ymm4,ymm4,ymm0
        vpxor   ymm8,ymm8,ymm2
        vpxor   ymm4,ymm4,ymm8
        vpsrld  ymm8,ymm4,31
        vpslldq ymm10,ymm4,12
        vpaddd  ymm4,ymm4,ymm4
        vpsrld  ymm9,ymm10,30
        vpor    ymm4,ymm4,ymm8
        vpslld  ymm10,ymm10,2
        vpxor   ymm4,ymm4,ymm9
        vpxor   ymm4,ymm4,ymm10
        vpaddd  ymm9,ymm4,ymm11
        vmovdqu YMMWORD[128+rsp],ymm9
        vpalignr        ymm5,ymm2,ymm1,8
        vpsrldq ymm8,ymm4,4
        vpxor   ymm5,ymm5,ymm1
        vpxor   ymm8,ymm8,ymm3
        vpxor   ymm5,ymm5,ymm8
        vpsrld  ymm8,ymm5,31
        vmovdqu ymm11,YMMWORD[((-32))+r14]
        vpslldq ymm10,ymm5,12
        vpaddd  ymm5,ymm5,ymm5
        vpsrld  ymm9,ymm10,30
        vpor    ymm5,ymm5,ymm8
        vpslld  ymm10,ymm10,2
        vpxor   ymm5,ymm5,ymm9
        vpxor   ymm5,ymm5,ymm10
        vpaddd  ymm9,ymm5,ymm11
        vmovdqu YMMWORD[160+rsp],ymm9
        vpalignr        ymm6,ymm3,ymm2,8
        vpsrldq ymm8,ymm5,4
        vpxor   ymm6,ymm6,ymm2
        vpxor   ymm8,ymm8,ymm4
        vpxor   ymm6,ymm6,ymm8
        vpsrld  ymm8,ymm6,31
        vpslldq ymm10,ymm6,12
        vpaddd  ymm6,ymm6,ymm6
        vpsrld  ymm9,ymm10,30
        vpor    ymm6,ymm6,ymm8
        vpslld  ymm10,ymm10,2
        vpxor   ymm6,ymm6,ymm9
        vpxor   ymm6,ymm6,ymm10
        vpaddd  ymm9,ymm6,ymm11
        vmovdqu YMMWORD[192+rsp],ymm9
        vpalignr        ymm7,ymm4,ymm3,8
        vpsrldq ymm8,ymm6,4
        vpxor   ymm7,ymm7,ymm3
        vpxor   ymm8,ymm8,ymm5
        vpxor   ymm7,ymm7,ymm8
        vpsrld  ymm8,ymm7,31
        vpslldq ymm10,ymm7,12
        vpaddd  ymm7,ymm7,ymm7
        vpsrld  ymm9,ymm10,30
        vpor    ymm7,ymm7,ymm8
        vpslld  ymm10,ymm10,2
        vpxor   ymm7,ymm7,ymm9
        vpxor   ymm7,ymm7,ymm10
        vpaddd  ymm9,ymm7,ymm11
        vmovdqu YMMWORD[224+rsp],ymm9
        lea     r13,[128+rsp]
        jmp     NEAR $L$oop_avx2
ALIGN   32
$L$oop_avx2:
        rorx    ebx,ebp,2
        andn    edi,ebp,edx
        and     ebp,ecx
        xor     ebp,edi
        jmp     NEAR $L$align32_1
ALIGN   32
$L$align32_1:
        vpalignr        ymm8,ymm7,ymm6,8
        vpxor   ymm0,ymm0,ymm4
        add     esi,DWORD[((-128))+r13]
        andn    edi,eax,ecx
        vpxor   ymm0,ymm0,ymm1
        add     esi,ebp
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        vpxor   ymm0,ymm0,ymm8
        and     eax,ebx
        add     esi,r12d
        xor     eax,edi
        vpsrld  ymm8,ymm0,30
        vpslld  ymm0,ymm0,2
        add     edx,DWORD[((-124))+r13]
        andn    edi,esi,ebx
        add     edx,eax
        rorx    r12d,esi,27
        rorx    eax,esi,2
        and     esi,ebp
        vpor    ymm0,ymm0,ymm8
        add     edx,r12d
        xor     esi,edi
        add     ecx,DWORD[((-120))+r13]
        andn    edi,edx,ebp
        vpaddd  ymm9,ymm0,ymm11
        add     ecx,esi
        rorx    r12d,edx,27
        rorx    esi,edx,2
        and     edx,eax
        vmovdqu YMMWORD[256+rsp],ymm9
        add     ecx,r12d
        xor     edx,edi
        add     ebx,DWORD[((-116))+r13]
        andn    edi,ecx,eax
        add     ebx,edx
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        and     ecx,esi
        add     ebx,r12d
        xor     ecx,edi
        add     ebp,DWORD[((-96))+r13]
        andn    edi,ebx,esi
        add     ebp,ecx
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        and     ebx,edx
        add     ebp,r12d
        xor     ebx,edi
        vpalignr        ymm8,ymm0,ymm7,8
        vpxor   ymm1,ymm1,ymm5
        add     eax,DWORD[((-92))+r13]
        andn    edi,ebp,edx
        vpxor   ymm1,ymm1,ymm2
        add     eax,ebx
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        vpxor   ymm1,ymm1,ymm8
        and     ebp,ecx
        add     eax,r12d
        xor     ebp,edi
        vpsrld  ymm8,ymm1,30
        vpslld  ymm1,ymm1,2
        add     esi,DWORD[((-88))+r13]
        andn    edi,eax,ecx
        add     esi,ebp
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        and     eax,ebx
        vpor    ymm1,ymm1,ymm8
        add     esi,r12d
        xor     eax,edi
        add     edx,DWORD[((-84))+r13]
        andn    edi,esi,ebx
        vpaddd  ymm9,ymm1,ymm11
        add     edx,eax
        rorx    r12d,esi,27
        rorx    eax,esi,2
        and     esi,ebp
        vmovdqu YMMWORD[288+rsp],ymm9
        add     edx,r12d
        xor     esi,edi
        add     ecx,DWORD[((-64))+r13]
        andn    edi,edx,ebp
        add     ecx,esi
        rorx    r12d,edx,27
        rorx    esi,edx,2
        and     edx,eax
        add     ecx,r12d
        xor     edx,edi
        add     ebx,DWORD[((-60))+r13]
        andn    edi,ecx,eax
        add     ebx,edx
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        and     ecx,esi
        add     ebx,r12d
        xor     ecx,edi
        vpalignr        ymm8,ymm1,ymm0,8
        vpxor   ymm2,ymm2,ymm6
        add     ebp,DWORD[((-56))+r13]
        andn    edi,ebx,esi
        vpxor   ymm2,ymm2,ymm3
        vmovdqu ymm11,YMMWORD[r14]
        add     ebp,ecx
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        vpxor   ymm2,ymm2,ymm8
        and     ebx,edx
        add     ebp,r12d
        xor     ebx,edi
        vpsrld  ymm8,ymm2,30
        vpslld  ymm2,ymm2,2
        add     eax,DWORD[((-52))+r13]
        andn    edi,ebp,edx
        add     eax,ebx
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        and     ebp,ecx
        vpor    ymm2,ymm2,ymm8
        add     eax,r12d
        xor     ebp,edi
        add     esi,DWORD[((-32))+r13]
        andn    edi,eax,ecx
        vpaddd  ymm9,ymm2,ymm11
        add     esi,ebp
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        and     eax,ebx
        vmovdqu YMMWORD[320+rsp],ymm9
        add     esi,r12d
        xor     eax,edi
        add     edx,DWORD[((-28))+r13]
        andn    edi,esi,ebx
        add     edx,eax
        rorx    r12d,esi,27
        rorx    eax,esi,2
        and     esi,ebp
        add     edx,r12d
        xor     esi,edi
        add     ecx,DWORD[((-24))+r13]
        andn    edi,edx,ebp
        add     ecx,esi
        rorx    r12d,edx,27
        rorx    esi,edx,2
        and     edx,eax
        add     ecx,r12d
        xor     edx,edi
        vpalignr        ymm8,ymm2,ymm1,8
        vpxor   ymm3,ymm3,ymm7
        add     ebx,DWORD[((-20))+r13]
        andn    edi,ecx,eax
        vpxor   ymm3,ymm3,ymm4
        add     ebx,edx
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        vpxor   ymm3,ymm3,ymm8
        and     ecx,esi
        add     ebx,r12d
        xor     ecx,edi
        vpsrld  ymm8,ymm3,30
        vpslld  ymm3,ymm3,2
        add     ebp,DWORD[r13]
        andn    edi,ebx,esi
        add     ebp,ecx
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        and     ebx,edx
        vpor    ymm3,ymm3,ymm8
        add     ebp,r12d
        xor     ebx,edi
        add     eax,DWORD[4+r13]
        andn    edi,ebp,edx
        vpaddd  ymm9,ymm3,ymm11
        add     eax,ebx
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        and     ebp,ecx
        vmovdqu YMMWORD[352+rsp],ymm9
        add     eax,r12d
        xor     ebp,edi
        add     esi,DWORD[8+r13]
        andn    edi,eax,ecx
        add     esi,ebp
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        and     eax,ebx
        add     esi,r12d
        xor     eax,edi
        add     edx,DWORD[12+r13]
        lea     edx,[rax*1+rdx]
        rorx    r12d,esi,27
        rorx    eax,esi,2
        xor     esi,ebp
        add     edx,r12d
        xor     esi,ebx
        vpalignr        ymm8,ymm3,ymm2,8
        vpxor   ymm4,ymm4,ymm0
        add     ecx,DWORD[32+r13]
        lea     ecx,[rsi*1+rcx]
        vpxor   ymm4,ymm4,ymm5
        rorx    r12d,edx,27
        rorx    esi,edx,2
        xor     edx,eax
        vpxor   ymm4,ymm4,ymm8
        add     ecx,r12d
        xor     edx,ebp
        add     ebx,DWORD[36+r13]
        vpsrld  ymm8,ymm4,30
        vpslld  ymm4,ymm4,2
        lea     ebx,[rdx*1+rbx]
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        xor     ecx,esi
        add     ebx,r12d
        xor     ecx,eax
        vpor    ymm4,ymm4,ymm8
        add     ebp,DWORD[40+r13]
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        vpaddd  ymm9,ymm4,ymm11
        xor     ebx,edx
        add     ebp,r12d
        xor     ebx,esi
        add     eax,DWORD[44+r13]
        vmovdqu YMMWORD[384+rsp],ymm9
        lea     eax,[rbx*1+rax]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        add     eax,r12d
        xor     ebp,edx
        add     esi,DWORD[64+r13]
        lea     esi,[rbp*1+rsi]
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        xor     eax,ebx
        add     esi,r12d
        xor     eax,ecx
        vpalignr        ymm8,ymm4,ymm3,8
        vpxor   ymm5,ymm5,ymm1
        add     edx,DWORD[68+r13]
        lea     edx,[rax*1+rdx]
        vpxor   ymm5,ymm5,ymm6
        rorx    r12d,esi,27
        rorx    eax,esi,2
        xor     esi,ebp
        vpxor   ymm5,ymm5,ymm8
        add     edx,r12d
        xor     esi,ebx
        add     ecx,DWORD[72+r13]
        vpsrld  ymm8,ymm5,30
        vpslld  ymm5,ymm5,2
        lea     ecx,[rsi*1+rcx]
        rorx    r12d,edx,27
        rorx    esi,edx,2
        xor     edx,eax
        add     ecx,r12d
        xor     edx,ebp
        vpor    ymm5,ymm5,ymm8
        add     ebx,DWORD[76+r13]
        lea     ebx,[rdx*1+rbx]
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        vpaddd  ymm9,ymm5,ymm11
        xor     ecx,esi
        add     ebx,r12d
        xor     ecx,eax
        add     ebp,DWORD[96+r13]
        vmovdqu YMMWORD[416+rsp],ymm9
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        xor     ebx,edx
        add     ebp,r12d
        xor     ebx,esi
        add     eax,DWORD[100+r13]
        lea     eax,[rbx*1+rax]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        add     eax,r12d
        xor     ebp,edx
        vpalignr        ymm8,ymm5,ymm4,8
        vpxor   ymm6,ymm6,ymm2
        add     esi,DWORD[104+r13]
        lea     esi,[rbp*1+rsi]
        vpxor   ymm6,ymm6,ymm7
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        xor     eax,ebx
        vpxor   ymm6,ymm6,ymm8
        add     esi,r12d
        xor     eax,ecx
        add     edx,DWORD[108+r13]
        lea     r13,[256+r13]
        vpsrld  ymm8,ymm6,30
        vpslld  ymm6,ymm6,2
        lea     edx,[rax*1+rdx]
        rorx    r12d,esi,27
        rorx    eax,esi,2
        xor     esi,ebp
        add     edx,r12d
        xor     esi,ebx
        vpor    ymm6,ymm6,ymm8
        add     ecx,DWORD[((-128))+r13]
        lea     ecx,[rsi*1+rcx]
        rorx    r12d,edx,27
        rorx    esi,edx,2
        vpaddd  ymm9,ymm6,ymm11
        xor     edx,eax
        add     ecx,r12d
        xor     edx,ebp
        add     ebx,DWORD[((-124))+r13]
        vmovdqu YMMWORD[448+rsp],ymm9
        lea     ebx,[rdx*1+rbx]
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        xor     ecx,esi
        add     ebx,r12d
        xor     ecx,eax
        add     ebp,DWORD[((-120))+r13]
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        xor     ebx,edx
        add     ebp,r12d
        xor     ebx,esi
        vpalignr        ymm8,ymm6,ymm5,8
        vpxor   ymm7,ymm7,ymm3
        add     eax,DWORD[((-116))+r13]
        lea     eax,[rbx*1+rax]
        vpxor   ymm7,ymm7,ymm0
        vmovdqu ymm11,YMMWORD[32+r14]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        vpxor   ymm7,ymm7,ymm8
        add     eax,r12d
        xor     ebp,edx
        add     esi,DWORD[((-96))+r13]
        vpsrld  ymm8,ymm7,30
        vpslld  ymm7,ymm7,2
        lea     esi,[rbp*1+rsi]
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        xor     eax,ebx
        add     esi,r12d
        xor     eax,ecx
        vpor    ymm7,ymm7,ymm8
        add     edx,DWORD[((-92))+r13]
        lea     edx,[rax*1+rdx]
        rorx    r12d,esi,27
        rorx    eax,esi,2
        vpaddd  ymm9,ymm7,ymm11
        xor     esi,ebp
        add     edx,r12d
        xor     esi,ebx
        add     ecx,DWORD[((-88))+r13]
        vmovdqu YMMWORD[480+rsp],ymm9
        lea     ecx,[rsi*1+rcx]
        rorx    r12d,edx,27
        rorx    esi,edx,2
        xor     edx,eax
        add     ecx,r12d
        xor     edx,ebp
        add     ebx,DWORD[((-84))+r13]
        mov     edi,esi
        xor     edi,eax
        lea     ebx,[rdx*1+rbx]
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        xor     ecx,esi
        add     ebx,r12d
        and     ecx,edi
        jmp     NEAR $L$align32_2
ALIGN   32
$L$align32_2:
        vpalignr        ymm8,ymm7,ymm6,8
        vpxor   ymm0,ymm0,ymm4
        add     ebp,DWORD[((-64))+r13]
        xor     ecx,esi
        vpxor   ymm0,ymm0,ymm1
        mov     edi,edx
        xor     edi,esi
        lea     ebp,[rbp*1+rcx]
        vpxor   ymm0,ymm0,ymm8
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        xor     ebx,edx
        vpsrld  ymm8,ymm0,30
        vpslld  ymm0,ymm0,2
        add     ebp,r12d
        and     ebx,edi
        add     eax,DWORD[((-60))+r13]
        xor     ebx,edx
        mov     edi,ecx
        xor     edi,edx
        vpor    ymm0,ymm0,ymm8
        lea     eax,[rbx*1+rax]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        vpaddd  ymm9,ymm0,ymm11
        add     eax,r12d
        and     ebp,edi
        add     esi,DWORD[((-56))+r13]
        xor     ebp,ecx
        vmovdqu YMMWORD[512+rsp],ymm9
        mov     edi,ebx
        xor     edi,ecx
        lea     esi,[rbp*1+rsi]
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        xor     eax,ebx
        add     esi,r12d
        and     eax,edi
        add     edx,DWORD[((-52))+r13]
        xor     eax,ebx
        mov     edi,ebp
        xor     edi,ebx
        lea     edx,[rax*1+rdx]
        rorx    r12d,esi,27
        rorx    eax,esi,2
        xor     esi,ebp
        add     edx,r12d
        and     esi,edi
        add     ecx,DWORD[((-32))+r13]
        xor     esi,ebp
        mov     edi,eax
        xor     edi,ebp
        lea     ecx,[rsi*1+rcx]
        rorx    r12d,edx,27
        rorx    esi,edx,2
        xor     edx,eax
        add     ecx,r12d
        and     edx,edi
        vpalignr        ymm8,ymm0,ymm7,8
        vpxor   ymm1,ymm1,ymm5
        add     ebx,DWORD[((-28))+r13]
        xor     edx,eax
        vpxor   ymm1,ymm1,ymm2
        mov     edi,esi
        xor     edi,eax
        lea     ebx,[rdx*1+rbx]
        vpxor   ymm1,ymm1,ymm8
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        xor     ecx,esi
        vpsrld  ymm8,ymm1,30
        vpslld  ymm1,ymm1,2
        add     ebx,r12d
        and     ecx,edi
        add     ebp,DWORD[((-24))+r13]
        xor     ecx,esi
        mov     edi,edx
        xor     edi,esi
        vpor    ymm1,ymm1,ymm8
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        xor     ebx,edx
        vpaddd  ymm9,ymm1,ymm11
        add     ebp,r12d
        and     ebx,edi
        add     eax,DWORD[((-20))+r13]
        xor     ebx,edx
        vmovdqu YMMWORD[544+rsp],ymm9
        mov     edi,ecx
        xor     edi,edx
        lea     eax,[rbx*1+rax]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        add     eax,r12d
        and     ebp,edi
        add     esi,DWORD[r13]
        xor     ebp,ecx
        mov     edi,ebx
        xor     edi,ecx
        lea     esi,[rbp*1+rsi]
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        xor     eax,ebx
        add     esi,r12d
        and     eax,edi
        add     edx,DWORD[4+r13]
        xor     eax,ebx
        mov     edi,ebp
        xor     edi,ebx
        lea     edx,[rax*1+rdx]
        rorx    r12d,esi,27
        rorx    eax,esi,2
        xor     esi,ebp
        add     edx,r12d
        and     esi,edi
        vpalignr        ymm8,ymm1,ymm0,8
        vpxor   ymm2,ymm2,ymm6
        add     ecx,DWORD[8+r13]
        xor     esi,ebp
        vpxor   ymm2,ymm2,ymm3
        mov     edi,eax
        xor     edi,ebp
        lea     ecx,[rsi*1+rcx]
        vpxor   ymm2,ymm2,ymm8
        rorx    r12d,edx,27
        rorx    esi,edx,2
        xor     edx,eax
        vpsrld  ymm8,ymm2,30
        vpslld  ymm2,ymm2,2
        add     ecx,r12d
        and     edx,edi
        add     ebx,DWORD[12+r13]
        xor     edx,eax
        mov     edi,esi
        xor     edi,eax
        vpor    ymm2,ymm2,ymm8
        lea     ebx,[rdx*1+rbx]
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        xor     ecx,esi
        vpaddd  ymm9,ymm2,ymm11
        add     ebx,r12d
        and     ecx,edi
        add     ebp,DWORD[32+r13]
        xor     ecx,esi
        vmovdqu YMMWORD[576+rsp],ymm9
        mov     edi,edx
        xor     edi,esi
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        xor     ebx,edx
        add     ebp,r12d
        and     ebx,edi
        add     eax,DWORD[36+r13]
        xor     ebx,edx
        mov     edi,ecx
        xor     edi,edx
        lea     eax,[rbx*1+rax]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        add     eax,r12d
        and     ebp,edi
        add     esi,DWORD[40+r13]
        xor     ebp,ecx
        mov     edi,ebx
        xor     edi,ecx
        lea     esi,[rbp*1+rsi]
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        xor     eax,ebx
        add     esi,r12d
        and     eax,edi
        vpalignr        ymm8,ymm2,ymm1,8
        vpxor   ymm3,ymm3,ymm7
        add     edx,DWORD[44+r13]
        xor     eax,ebx
        vpxor   ymm3,ymm3,ymm4
        mov     edi,ebp
        xor     edi,ebx
        lea     edx,[rax*1+rdx]
        vpxor   ymm3,ymm3,ymm8
        rorx    r12d,esi,27
        rorx    eax,esi,2
        xor     esi,ebp
        vpsrld  ymm8,ymm3,30
        vpslld  ymm3,ymm3,2
        add     edx,r12d
        and     esi,edi
        add     ecx,DWORD[64+r13]
        xor     esi,ebp
        mov     edi,eax
        xor     edi,ebp
        vpor    ymm3,ymm3,ymm8
        lea     ecx,[rsi*1+rcx]
        rorx    r12d,edx,27
        rorx    esi,edx,2
        xor     edx,eax
        vpaddd  ymm9,ymm3,ymm11
        add     ecx,r12d
        and     edx,edi
        add     ebx,DWORD[68+r13]
        xor     edx,eax
        vmovdqu YMMWORD[608+rsp],ymm9
        mov     edi,esi
        xor     edi,eax
        lea     ebx,[rdx*1+rbx]
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        xor     ecx,esi
        add     ebx,r12d
        and     ecx,edi
        add     ebp,DWORD[72+r13]
        xor     ecx,esi
        mov     edi,edx
        xor     edi,esi
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        xor     ebx,edx
        add     ebp,r12d
        and     ebx,edi
        add     eax,DWORD[76+r13]
        xor     ebx,edx
        lea     eax,[rbx*1+rax]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        add     eax,r12d
        xor     ebp,edx
        add     esi,DWORD[96+r13]
        lea     esi,[rbp*1+rsi]
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        xor     eax,ebx
        add     esi,r12d
        xor     eax,ecx
        add     edx,DWORD[100+r13]
        lea     edx,[rax*1+rdx]
        rorx    r12d,esi,27
        rorx    eax,esi,2
        xor     esi,ebp
        add     edx,r12d
        xor     esi,ebx
        add     ecx,DWORD[104+r13]
        lea     ecx,[rsi*1+rcx]
        rorx    r12d,edx,27
        rorx    esi,edx,2
        xor     edx,eax
        add     ecx,r12d
        xor     edx,ebp
        add     ebx,DWORD[108+r13]
        lea     r13,[256+r13]
        lea     ebx,[rdx*1+rbx]
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        xor     ecx,esi
        add     ebx,r12d
        xor     ecx,eax
        add     ebp,DWORD[((-128))+r13]
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        xor     ebx,edx
        add     ebp,r12d
        xor     ebx,esi
        add     eax,DWORD[((-124))+r13]
        lea     eax,[rbx*1+rax]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        add     eax,r12d
        xor     ebp,edx
        add     esi,DWORD[((-120))+r13]
        lea     esi,[rbp*1+rsi]
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        xor     eax,ebx
        add     esi,r12d
        xor     eax,ecx
        add     edx,DWORD[((-116))+r13]
        lea     edx,[rax*1+rdx]
        rorx    r12d,esi,27
        rorx    eax,esi,2
        xor     esi,ebp
        add     edx,r12d
        xor     esi,ebx
        add     ecx,DWORD[((-96))+r13]
        lea     ecx,[rsi*1+rcx]
        rorx    r12d,edx,27
        rorx    esi,edx,2
        xor     edx,eax
        add     ecx,r12d
        xor     edx,ebp
        add     ebx,DWORD[((-92))+r13]
        lea     ebx,[rdx*1+rbx]
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        xor     ecx,esi
        add     ebx,r12d
        xor     ecx,eax
        add     ebp,DWORD[((-88))+r13]
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        xor     ebx,edx
        add     ebp,r12d
        xor     ebx,esi
        add     eax,DWORD[((-84))+r13]
        lea     eax,[rbx*1+rax]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        add     eax,r12d
        xor     ebp,edx
        add     esi,DWORD[((-64))+r13]
        lea     esi,[rbp*1+rsi]
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        xor     eax,ebx
        add     esi,r12d
        xor     eax,ecx
        add     edx,DWORD[((-60))+r13]
        lea     edx,[rax*1+rdx]
        rorx    r12d,esi,27
        rorx    eax,esi,2
        xor     esi,ebp
        add     edx,r12d
        xor     esi,ebx
        add     ecx,DWORD[((-56))+r13]
        lea     ecx,[rsi*1+rcx]
        rorx    r12d,edx,27
        rorx    esi,edx,2
        xor     edx,eax
        add     ecx,r12d
        xor     edx,ebp
        add     ebx,DWORD[((-52))+r13]
        lea     ebx,[rdx*1+rbx]
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        xor     ecx,esi
        add     ebx,r12d
        xor     ecx,eax
        add     ebp,DWORD[((-32))+r13]
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        xor     ebx,edx
        add     ebp,r12d
        xor     ebx,esi
        add     eax,DWORD[((-28))+r13]
        lea     eax,[rbx*1+rax]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        add     eax,r12d
        xor     ebp,edx
        add     esi,DWORD[((-24))+r13]
        lea     esi,[rbp*1+rsi]
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        xor     eax,ebx
        add     esi,r12d
        xor     eax,ecx
        add     edx,DWORD[((-20))+r13]
        lea     edx,[rax*1+rdx]
        rorx    r12d,esi,27
        add     edx,r12d
        lea     r13,[128+r9]
        lea     rdi,[128+r9]
        cmp     r13,r10
        cmovae  r13,r9


        add     edx,DWORD[r8]
        add     esi,DWORD[4+r8]
        add     ebp,DWORD[8+r8]
        mov     DWORD[r8],edx
        add     ebx,DWORD[12+r8]
        mov     DWORD[4+r8],esi
        mov     eax,edx
        add     ecx,DWORD[16+r8]
        mov     r12d,ebp
        mov     DWORD[8+r8],ebp
        mov     edx,ebx

        mov     DWORD[12+r8],ebx
        mov     ebp,esi
        mov     DWORD[16+r8],ecx

        mov     esi,ecx
        mov     ecx,r12d


        cmp     r9,r10
        je      NEAR $L$done_avx2
        vmovdqu ymm6,YMMWORD[64+r14]
        cmp     rdi,r10
        ja      NEAR $L$ast_avx2

        vmovdqu xmm0,XMMWORD[((-64))+rdi]
        vmovdqu xmm1,XMMWORD[((-48))+rdi]
        vmovdqu xmm2,XMMWORD[((-32))+rdi]
        vmovdqu xmm3,XMMWORD[((-16))+rdi]
        vinserti128     ymm0,ymm0,XMMWORD[r13],1
        vinserti128     ymm1,ymm1,XMMWORD[16+r13],1
        vinserti128     ymm2,ymm2,XMMWORD[32+r13],1
        vinserti128     ymm3,ymm3,XMMWORD[48+r13],1
        jmp     NEAR $L$ast_avx2

ALIGN   32
$L$ast_avx2:
        lea     r13,[((128+16))+rsp]
        rorx    ebx,ebp,2
        andn    edi,ebp,edx
        and     ebp,ecx
        xor     ebp,edi
        sub     r9,-128
        add     esi,DWORD[((-128))+r13]
        andn    edi,eax,ecx
        add     esi,ebp
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        and     eax,ebx
        add     esi,r12d
        xor     eax,edi
        add     edx,DWORD[((-124))+r13]
        andn    edi,esi,ebx
        add     edx,eax
        rorx    r12d,esi,27
        rorx    eax,esi,2
        and     esi,ebp
        add     edx,r12d
        xor     esi,edi
        add     ecx,DWORD[((-120))+r13]
        andn    edi,edx,ebp
        add     ecx,esi
        rorx    r12d,edx,27
        rorx    esi,edx,2
        and     edx,eax
        add     ecx,r12d
        xor     edx,edi
        add     ebx,DWORD[((-116))+r13]
        andn    edi,ecx,eax
        add     ebx,edx
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        and     ecx,esi
        add     ebx,r12d
        xor     ecx,edi
        add     ebp,DWORD[((-96))+r13]
        andn    edi,ebx,esi
        add     ebp,ecx
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        and     ebx,edx
        add     ebp,r12d
        xor     ebx,edi
        add     eax,DWORD[((-92))+r13]
        andn    edi,ebp,edx
        add     eax,ebx
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        and     ebp,ecx
        add     eax,r12d
        xor     ebp,edi
        add     esi,DWORD[((-88))+r13]
        andn    edi,eax,ecx
        add     esi,ebp
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        and     eax,ebx
        add     esi,r12d
        xor     eax,edi
        add     edx,DWORD[((-84))+r13]
        andn    edi,esi,ebx
        add     edx,eax
        rorx    r12d,esi,27
        rorx    eax,esi,2
        and     esi,ebp
        add     edx,r12d
        xor     esi,edi
        add     ecx,DWORD[((-64))+r13]
        andn    edi,edx,ebp
        add     ecx,esi
        rorx    r12d,edx,27
        rorx    esi,edx,2
        and     edx,eax
        add     ecx,r12d
        xor     edx,edi
        add     ebx,DWORD[((-60))+r13]
        andn    edi,ecx,eax
        add     ebx,edx
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        and     ecx,esi
        add     ebx,r12d
        xor     ecx,edi
        add     ebp,DWORD[((-56))+r13]
        andn    edi,ebx,esi
        add     ebp,ecx
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        and     ebx,edx
        add     ebp,r12d
        xor     ebx,edi
        add     eax,DWORD[((-52))+r13]
        andn    edi,ebp,edx
        add     eax,ebx
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        and     ebp,ecx
        add     eax,r12d
        xor     ebp,edi
        add     esi,DWORD[((-32))+r13]
        andn    edi,eax,ecx
        add     esi,ebp
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        and     eax,ebx
        add     esi,r12d
        xor     eax,edi
        add     edx,DWORD[((-28))+r13]
        andn    edi,esi,ebx
        add     edx,eax
        rorx    r12d,esi,27
        rorx    eax,esi,2
        and     esi,ebp
        add     edx,r12d
        xor     esi,edi
        add     ecx,DWORD[((-24))+r13]
        andn    edi,edx,ebp
        add     ecx,esi
        rorx    r12d,edx,27
        rorx    esi,edx,2
        and     edx,eax
        add     ecx,r12d
        xor     edx,edi
        add     ebx,DWORD[((-20))+r13]
        andn    edi,ecx,eax
        add     ebx,edx
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        and     ecx,esi
        add     ebx,r12d
        xor     ecx,edi
        add     ebp,DWORD[r13]
        andn    edi,ebx,esi
        add     ebp,ecx
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        and     ebx,edx
        add     ebp,r12d
        xor     ebx,edi
        add     eax,DWORD[4+r13]
        andn    edi,ebp,edx
        add     eax,ebx
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        and     ebp,ecx
        add     eax,r12d
        xor     ebp,edi
        add     esi,DWORD[8+r13]
        andn    edi,eax,ecx
        add     esi,ebp
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        and     eax,ebx
        add     esi,r12d
        xor     eax,edi
        add     edx,DWORD[12+r13]
        lea     edx,[rax*1+rdx]
        rorx    r12d,esi,27
        rorx    eax,esi,2
        xor     esi,ebp
        add     edx,r12d
        xor     esi,ebx
        add     ecx,DWORD[32+r13]
        lea     ecx,[rsi*1+rcx]
        rorx    r12d,edx,27
        rorx    esi,edx,2
        xor     edx,eax
        add     ecx,r12d
        xor     edx,ebp
        add     ebx,DWORD[36+r13]
        lea     ebx,[rdx*1+rbx]
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        xor     ecx,esi
        add     ebx,r12d
        xor     ecx,eax
        add     ebp,DWORD[40+r13]
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        xor     ebx,edx
        add     ebp,r12d
        xor     ebx,esi
        add     eax,DWORD[44+r13]
        lea     eax,[rbx*1+rax]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        add     eax,r12d
        xor     ebp,edx
        add     esi,DWORD[64+r13]
        lea     esi,[rbp*1+rsi]
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        xor     eax,ebx
        add     esi,r12d
        xor     eax,ecx
        vmovdqu ymm11,YMMWORD[((-64))+r14]
        vpshufb ymm0,ymm0,ymm6
        add     edx,DWORD[68+r13]
        lea     edx,[rax*1+rdx]
        rorx    r12d,esi,27
        rorx    eax,esi,2
        xor     esi,ebp
        add     edx,r12d
        xor     esi,ebx
        add     ecx,DWORD[72+r13]
        lea     ecx,[rsi*1+rcx]
        rorx    r12d,edx,27
        rorx    esi,edx,2
        xor     edx,eax
        add     ecx,r12d
        xor     edx,ebp
        add     ebx,DWORD[76+r13]
        lea     ebx,[rdx*1+rbx]
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        xor     ecx,esi
        add     ebx,r12d
        xor     ecx,eax
        add     ebp,DWORD[96+r13]
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        xor     ebx,edx
        add     ebp,r12d
        xor     ebx,esi
        add     eax,DWORD[100+r13]
        lea     eax,[rbx*1+rax]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        add     eax,r12d
        xor     ebp,edx
        vpshufb ymm1,ymm1,ymm6
        vpaddd  ymm8,ymm0,ymm11
        add     esi,DWORD[104+r13]
        lea     esi,[rbp*1+rsi]
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        xor     eax,ebx
        add     esi,r12d
        xor     eax,ecx
        add     edx,DWORD[108+r13]
        lea     r13,[256+r13]
        lea     edx,[rax*1+rdx]
        rorx    r12d,esi,27
        rorx    eax,esi,2
        xor     esi,ebp
        add     edx,r12d
        xor     esi,ebx
        add     ecx,DWORD[((-128))+r13]
        lea     ecx,[rsi*1+rcx]
        rorx    r12d,edx,27
        rorx    esi,edx,2
        xor     edx,eax
        add     ecx,r12d
        xor     edx,ebp
        add     ebx,DWORD[((-124))+r13]
        lea     ebx,[rdx*1+rbx]
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        xor     ecx,esi
        add     ebx,r12d
        xor     ecx,eax
        add     ebp,DWORD[((-120))+r13]
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        xor     ebx,edx
        add     ebp,r12d
        xor     ebx,esi
        vmovdqu YMMWORD[rsp],ymm8
        vpshufb ymm2,ymm2,ymm6
        vpaddd  ymm9,ymm1,ymm11
        add     eax,DWORD[((-116))+r13]
        lea     eax,[rbx*1+rax]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        add     eax,r12d
        xor     ebp,edx
        add     esi,DWORD[((-96))+r13]
        lea     esi,[rbp*1+rsi]
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        xor     eax,ebx
        add     esi,r12d
        xor     eax,ecx
        add     edx,DWORD[((-92))+r13]
        lea     edx,[rax*1+rdx]
        rorx    r12d,esi,27
        rorx    eax,esi,2
        xor     esi,ebp
        add     edx,r12d
        xor     esi,ebx
        add     ecx,DWORD[((-88))+r13]
        lea     ecx,[rsi*1+rcx]
        rorx    r12d,edx,27
        rorx    esi,edx,2
        xor     edx,eax
        add     ecx,r12d
        xor     edx,ebp
        add     ebx,DWORD[((-84))+r13]
        mov     edi,esi
        xor     edi,eax
        lea     ebx,[rdx*1+rbx]
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        xor     ecx,esi
        add     ebx,r12d
        and     ecx,edi
        vmovdqu YMMWORD[32+rsp],ymm9
        vpshufb ymm3,ymm3,ymm6
        vpaddd  ymm6,ymm2,ymm11
        add     ebp,DWORD[((-64))+r13]
        xor     ecx,esi
        mov     edi,edx
        xor     edi,esi
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        xor     ebx,edx
        add     ebp,r12d
        and     ebx,edi
        add     eax,DWORD[((-60))+r13]
        xor     ebx,edx
        mov     edi,ecx
        xor     edi,edx
        lea     eax,[rbx*1+rax]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        add     eax,r12d
        and     ebp,edi
        add     esi,DWORD[((-56))+r13]
        xor     ebp,ecx
        mov     edi,ebx
        xor     edi,ecx
        lea     esi,[rbp*1+rsi]
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        xor     eax,ebx
        add     esi,r12d
        and     eax,edi
        add     edx,DWORD[((-52))+r13]
        xor     eax,ebx
        mov     edi,ebp
        xor     edi,ebx
        lea     edx,[rax*1+rdx]
        rorx    r12d,esi,27
        rorx    eax,esi,2
        xor     esi,ebp
        add     edx,r12d
        and     esi,edi
        add     ecx,DWORD[((-32))+r13]
        xor     esi,ebp
        mov     edi,eax
        xor     edi,ebp
        lea     ecx,[rsi*1+rcx]
        rorx    r12d,edx,27
        rorx    esi,edx,2
        xor     edx,eax
        add     ecx,r12d
        and     edx,edi
        jmp     NEAR $L$align32_3
ALIGN   32
$L$align32_3:
        vmovdqu YMMWORD[64+rsp],ymm6
        vpaddd  ymm7,ymm3,ymm11
        add     ebx,DWORD[((-28))+r13]
        xor     edx,eax
        mov     edi,esi
        xor     edi,eax
        lea     ebx,[rdx*1+rbx]
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        xor     ecx,esi
        add     ebx,r12d
        and     ecx,edi
        add     ebp,DWORD[((-24))+r13]
        xor     ecx,esi
        mov     edi,edx
        xor     edi,esi
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        xor     ebx,edx
        add     ebp,r12d
        and     ebx,edi
        add     eax,DWORD[((-20))+r13]
        xor     ebx,edx
        mov     edi,ecx
        xor     edi,edx
        lea     eax,[rbx*1+rax]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        add     eax,r12d
        and     ebp,edi
        add     esi,DWORD[r13]
        xor     ebp,ecx
        mov     edi,ebx
        xor     edi,ecx
        lea     esi,[rbp*1+rsi]
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        xor     eax,ebx
        add     esi,r12d
        and     eax,edi
        add     edx,DWORD[4+r13]
        xor     eax,ebx
        mov     edi,ebp
        xor     edi,ebx
        lea     edx,[rax*1+rdx]
        rorx    r12d,esi,27
        rorx    eax,esi,2
        xor     esi,ebp
        add     edx,r12d
        and     esi,edi
        vmovdqu YMMWORD[96+rsp],ymm7
        add     ecx,DWORD[8+r13]
        xor     esi,ebp
        mov     edi,eax
        xor     edi,ebp
        lea     ecx,[rsi*1+rcx]
        rorx    r12d,edx,27
        rorx    esi,edx,2
        xor     edx,eax
        add     ecx,r12d
        and     edx,edi
        add     ebx,DWORD[12+r13]
        xor     edx,eax
        mov     edi,esi
        xor     edi,eax
        lea     ebx,[rdx*1+rbx]
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        xor     ecx,esi
        add     ebx,r12d
        and     ecx,edi
        add     ebp,DWORD[32+r13]
        xor     ecx,esi
        mov     edi,edx
        xor     edi,esi
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        xor     ebx,edx
        add     ebp,r12d
        and     ebx,edi
        add     eax,DWORD[36+r13]
        xor     ebx,edx
        mov     edi,ecx
        xor     edi,edx
        lea     eax,[rbx*1+rax]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        add     eax,r12d
        and     ebp,edi
        add     esi,DWORD[40+r13]
        xor     ebp,ecx
        mov     edi,ebx
        xor     edi,ecx
        lea     esi,[rbp*1+rsi]
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        xor     eax,ebx
        add     esi,r12d
        and     eax,edi
        vpalignr        ymm4,ymm1,ymm0,8
        add     edx,DWORD[44+r13]
        xor     eax,ebx
        mov     edi,ebp
        xor     edi,ebx
        vpsrldq ymm8,ymm3,4
        lea     edx,[rax*1+rdx]
        rorx    r12d,esi,27
        rorx    eax,esi,2
        vpxor   ymm4,ymm4,ymm0
        vpxor   ymm8,ymm8,ymm2
        xor     esi,ebp
        add     edx,r12d
        vpxor   ymm4,ymm4,ymm8
        and     esi,edi
        add     ecx,DWORD[64+r13]
        xor     esi,ebp
        mov     edi,eax
        vpsrld  ymm8,ymm4,31
        xor     edi,ebp
        lea     ecx,[rsi*1+rcx]
        rorx    r12d,edx,27
        vpslldq ymm10,ymm4,12
        vpaddd  ymm4,ymm4,ymm4
        rorx    esi,edx,2
        xor     edx,eax
        vpsrld  ymm9,ymm10,30
        vpor    ymm4,ymm4,ymm8
        add     ecx,r12d
        and     edx,edi
        vpslld  ymm10,ymm10,2
        vpxor   ymm4,ymm4,ymm9
        add     ebx,DWORD[68+r13]
        xor     edx,eax
        vpxor   ymm4,ymm4,ymm10
        mov     edi,esi
        xor     edi,eax
        lea     ebx,[rdx*1+rbx]
        vpaddd  ymm9,ymm4,ymm11
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        xor     ecx,esi
        vmovdqu YMMWORD[128+rsp],ymm9
        add     ebx,r12d
        and     ecx,edi
        add     ebp,DWORD[72+r13]
        xor     ecx,esi
        mov     edi,edx
        xor     edi,esi
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        xor     ebx,edx
        add     ebp,r12d
        and     ebx,edi
        add     eax,DWORD[76+r13]
        xor     ebx,edx
        lea     eax,[rbx*1+rax]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        add     eax,r12d
        xor     ebp,edx
        vpalignr        ymm5,ymm2,ymm1,8
        add     esi,DWORD[96+r13]
        lea     esi,[rbp*1+rsi]
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        vpsrldq ymm8,ymm4,4
        xor     eax,ebx
        add     esi,r12d
        xor     eax,ecx
        vpxor   ymm5,ymm5,ymm1
        vpxor   ymm8,ymm8,ymm3
        add     edx,DWORD[100+r13]
        lea     edx,[rax*1+rdx]
        vpxor   ymm5,ymm5,ymm8
        rorx    r12d,esi,27
        rorx    eax,esi,2
        xor     esi,ebp
        add     edx,r12d
        vpsrld  ymm8,ymm5,31
        vmovdqu ymm11,YMMWORD[((-32))+r14]
        xor     esi,ebx
        add     ecx,DWORD[104+r13]
        lea     ecx,[rsi*1+rcx]
        vpslldq ymm10,ymm5,12
        vpaddd  ymm5,ymm5,ymm5
        rorx    r12d,edx,27
        rorx    esi,edx,2
        vpsrld  ymm9,ymm10,30
        vpor    ymm5,ymm5,ymm8
        xor     edx,eax
        add     ecx,r12d
        vpslld  ymm10,ymm10,2
        vpxor   ymm5,ymm5,ymm9
        xor     edx,ebp
        add     ebx,DWORD[108+r13]
        lea     r13,[256+r13]
        vpxor   ymm5,ymm5,ymm10
        lea     ebx,[rdx*1+rbx]
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        vpaddd  ymm9,ymm5,ymm11
        xor     ecx,esi
        add     ebx,r12d
        xor     ecx,eax
        vmovdqu YMMWORD[160+rsp],ymm9
        add     ebp,DWORD[((-128))+r13]
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        xor     ebx,edx
        add     ebp,r12d
        xor     ebx,esi
        vpalignr        ymm6,ymm3,ymm2,8
        add     eax,DWORD[((-124))+r13]
        lea     eax,[rbx*1+rax]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        vpsrldq ymm8,ymm5,4
        xor     ebp,ecx
        add     eax,r12d
        xor     ebp,edx
        vpxor   ymm6,ymm6,ymm2
        vpxor   ymm8,ymm8,ymm4
        add     esi,DWORD[((-120))+r13]
        lea     esi,[rbp*1+rsi]
        vpxor   ymm6,ymm6,ymm8
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        xor     eax,ebx
        add     esi,r12d
        vpsrld  ymm8,ymm6,31
        xor     eax,ecx
        add     edx,DWORD[((-116))+r13]
        lea     edx,[rax*1+rdx]
        vpslldq ymm10,ymm6,12
        vpaddd  ymm6,ymm6,ymm6
        rorx    r12d,esi,27
        rorx    eax,esi,2
        vpsrld  ymm9,ymm10,30
        vpor    ymm6,ymm6,ymm8
        xor     esi,ebp
        add     edx,r12d
        vpslld  ymm10,ymm10,2
        vpxor   ymm6,ymm6,ymm9
        xor     esi,ebx
        add     ecx,DWORD[((-96))+r13]
        vpxor   ymm6,ymm6,ymm10
        lea     ecx,[rsi*1+rcx]
        rorx    r12d,edx,27
        rorx    esi,edx,2
        vpaddd  ymm9,ymm6,ymm11
        xor     edx,eax
        add     ecx,r12d
        xor     edx,ebp
        vmovdqu YMMWORD[192+rsp],ymm9
        add     ebx,DWORD[((-92))+r13]
        lea     ebx,[rdx*1+rbx]
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        xor     ecx,esi
        add     ebx,r12d
        xor     ecx,eax
        vpalignr        ymm7,ymm4,ymm3,8
        add     ebp,DWORD[((-88))+r13]
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        vpsrldq ymm8,ymm6,4
        xor     ebx,edx
        add     ebp,r12d
        xor     ebx,esi
        vpxor   ymm7,ymm7,ymm3
        vpxor   ymm8,ymm8,ymm5
        add     eax,DWORD[((-84))+r13]
        lea     eax,[rbx*1+rax]
        vpxor   ymm7,ymm7,ymm8
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        add     eax,r12d
        vpsrld  ymm8,ymm7,31
        xor     ebp,edx
        add     esi,DWORD[((-64))+r13]
        lea     esi,[rbp*1+rsi]
        vpslldq ymm10,ymm7,12
        vpaddd  ymm7,ymm7,ymm7
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        vpsrld  ymm9,ymm10,30
        vpor    ymm7,ymm7,ymm8
        xor     eax,ebx
        add     esi,r12d
        vpslld  ymm10,ymm10,2
        vpxor   ymm7,ymm7,ymm9
        xor     eax,ecx
        add     edx,DWORD[((-60))+r13]
        vpxor   ymm7,ymm7,ymm10
        lea     edx,[rax*1+rdx]
        rorx    r12d,esi,27
        rorx    eax,esi,2
        vpaddd  ymm9,ymm7,ymm11
        xor     esi,ebp
        add     edx,r12d
        xor     esi,ebx
        vmovdqu YMMWORD[224+rsp],ymm9
        add     ecx,DWORD[((-56))+r13]
        lea     ecx,[rsi*1+rcx]
        rorx    r12d,edx,27
        rorx    esi,edx,2
        xor     edx,eax
        add     ecx,r12d
        xor     edx,ebp
        add     ebx,DWORD[((-52))+r13]
        lea     ebx,[rdx*1+rbx]
        rorx    r12d,ecx,27
        rorx    edx,ecx,2
        xor     ecx,esi
        add     ebx,r12d
        xor     ecx,eax
        add     ebp,DWORD[((-32))+r13]
        lea     ebp,[rbp*1+rcx]
        rorx    r12d,ebx,27
        rorx    ecx,ebx,2
        xor     ebx,edx
        add     ebp,r12d
        xor     ebx,esi
        add     eax,DWORD[((-28))+r13]
        lea     eax,[rbx*1+rax]
        rorx    r12d,ebp,27
        rorx    ebx,ebp,2
        xor     ebp,ecx
        add     eax,r12d
        xor     ebp,edx
        add     esi,DWORD[((-24))+r13]
        lea     esi,[rbp*1+rsi]
        rorx    r12d,eax,27
        rorx    ebp,eax,2
        xor     eax,ebx
        add     esi,r12d
        xor     eax,ecx
        add     edx,DWORD[((-20))+r13]
        lea     edx,[rax*1+rdx]
        rorx    r12d,esi,27
        add     edx,r12d
        lea     r13,[128+rsp]


        add     edx,DWORD[r8]
        add     esi,DWORD[4+r8]
        add     ebp,DWORD[8+r8]
        mov     DWORD[r8],edx
        add     ebx,DWORD[12+r8]
        mov     DWORD[4+r8],esi
        mov     eax,edx
        add     ecx,DWORD[16+r8]
        mov     r12d,ebp
        mov     DWORD[8+r8],ebp
        mov     edx,ebx

        mov     DWORD[12+r8],ebx
        mov     ebp,esi
        mov     DWORD[16+r8],ecx

        mov     esi,ecx
        mov     ecx,r12d


        cmp     r9,r10
        jbe     NEAR $L$oop_avx2

$L$done_avx2:
        vzeroupper
        movaps  xmm6,XMMWORD[((-40-96))+r11]
        movaps  xmm7,XMMWORD[((-40-80))+r11]
        movaps  xmm8,XMMWORD[((-40-64))+r11]
        movaps  xmm9,XMMWORD[((-40-48))+r11]
        movaps  xmm10,XMMWORD[((-40-32))+r11]
        movaps  xmm11,XMMWORD[((-40-16))+r11]
        mov     r14,QWORD[((-40))+r11]

        mov     r13,QWORD[((-32))+r11]

        mov     r12,QWORD[((-24))+r11]

        mov     rbp,QWORD[((-16))+r11]

        mov     rbx,QWORD[((-8))+r11]

        lea     rsp,[r11]

$L$epilogue_avx2:
        mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
        mov     rsi,QWORD[16+rsp]
        DB      0F3h,0C3h               ;repret

$L$SEH_end_sha1_block_data_order_avx2:
section .rdata rdata align=64
ALIGN   64
K_XX_XX:
        DD      0x5a827999,0x5a827999,0x5a827999,0x5a827999
        DD      0x5a827999,0x5a827999,0x5a827999,0x5a827999
        DD      0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
        DD      0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
        DD      0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
        DD      0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
        DD      0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
        DD      0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
        DD      0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
        DD      0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
DB      0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
section .text
DB      83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
DB      102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44
DB      32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60
DB      97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114
DB      103,62,0
ALIGN   64
EXTERN  __imp_RtlVirtualUnwind

ALIGN   16
se_handler:
        push    rsi
        push    rdi
        push    rbx
        push    rbp
        push    r12
        push    r13
        push    r14
        push    r15
        pushfq
        sub     rsp,64

        mov     rax,QWORD[120+r8]
        mov     rbx,QWORD[248+r8]

        lea     r10,[$L$prologue]
        cmp     rbx,r10
        jb      NEAR $L$common_seh_tail

        mov     rax,QWORD[152+r8]

        lea     r10,[$L$epilogue]
        cmp     rbx,r10
        jae     NEAR $L$common_seh_tail

        mov     rax,QWORD[64+rax]

        mov     rbx,QWORD[((-8))+rax]
        mov     rbp,QWORD[((-16))+rax]
        mov     r12,QWORD[((-24))+rax]
        mov     r13,QWORD[((-32))+rax]
        mov     r14,QWORD[((-40))+rax]
        mov     QWORD[144+r8],rbx
        mov     QWORD[160+r8],rbp
        mov     QWORD[216+r8],r12
        mov     QWORD[224+r8],r13
        mov     QWORD[232+r8],r14

        jmp     NEAR $L$common_seh_tail


ALIGN   16
shaext_handler:
        push    rsi
        push    rdi
        push    rbx
        push    rbp
        push    r12
        push    r13
        push    r14
        push    r15
        pushfq
        sub     rsp,64

        mov     rax,QWORD[120+r8]
        mov     rbx,QWORD[248+r8]

        lea     r10,[$L$prologue_shaext]
        cmp     rbx,r10
        jb      NEAR $L$common_seh_tail

        lea     r10,[$L$epilogue_shaext]
        cmp     rbx,r10
        jae     NEAR $L$common_seh_tail

        lea     rsi,[((-8-64))+rax]
        lea     rdi,[512+r8]
        mov     ecx,8
        DD      0xa548f3fc

        jmp     NEAR $L$common_seh_tail


ALIGN   16
ssse3_handler:
        push    rsi
        push    rdi
        push    rbx
        push    rbp
        push    r12
        push    r13
        push    r14
        push    r15
        pushfq
        sub     rsp,64

        mov     rax,QWORD[120+r8]
        mov     rbx,QWORD[248+r8]

        mov     rsi,QWORD[8+r9]
        mov     r11,QWORD[56+r9]

        mov     r10d,DWORD[r11]
        lea     r10,[r10*1+rsi]
        cmp     rbx,r10
        jb      NEAR $L$common_seh_tail

        mov     rax,QWORD[208+r8]

        mov     r10d,DWORD[4+r11]
        lea     r10,[r10*1+rsi]
        cmp     rbx,r10
        jae     NEAR $L$common_seh_tail

        lea     rsi,[((-40-96))+rax]
        lea     rdi,[512+r8]
        mov     ecx,12
        DD      0xa548f3fc

        mov     rbx,QWORD[((-8))+rax]
        mov     rbp,QWORD[((-16))+rax]
        mov     r12,QWORD[((-24))+rax]
        mov     r13,QWORD[((-32))+rax]
        mov     r14,QWORD[((-40))+rax]
        mov     QWORD[144+r8],rbx
        mov     QWORD[160+r8],rbp
        mov     QWORD[216+r8],r12
        mov     QWORD[224+r8],r13
        mov     QWORD[232+r8],r14

$L$common_seh_tail:
        mov     rdi,QWORD[8+rax]
        mov     rsi,QWORD[16+rax]
        mov     QWORD[152+r8],rax
        mov     QWORD[168+r8],rsi
        mov     QWORD[176+r8],rdi

        mov     rdi,QWORD[40+r9]
        mov     rsi,r8
        mov     ecx,154
        DD      0xa548f3fc

        mov     rsi,r9
        xor     rcx,rcx
        mov     rdx,QWORD[8+rsi]
        mov     r8,QWORD[rsi]
        mov     r9,QWORD[16+rsi]
        mov     r10,QWORD[40+rsi]
        lea     r11,[56+rsi]
        lea     r12,[24+rsi]
        mov     QWORD[32+rsp],r10
        mov     QWORD[40+rsp],r11
        mov     QWORD[48+rsp],r12
        mov     QWORD[56+rsp],rcx
        call    QWORD[__imp_RtlVirtualUnwind]

        mov     eax,1
        add     rsp,64
        popfq
        pop     r15
        pop     r14
        pop     r13
        pop     r12
        pop     rbp
        pop     rbx
        pop     rdi
        pop     rsi
        DB      0F3h,0C3h               ;repret


section .pdata rdata align=4
ALIGN   4
        DD      $L$SEH_begin_sha1_block_data_order wrt ..imagebase
        DD      $L$SEH_end_sha1_block_data_order wrt ..imagebase
        DD      $L$SEH_info_sha1_block_data_order wrt ..imagebase
        DD      $L$SEH_begin_sha1_block_data_order_shaext wrt ..imagebase
        DD      $L$SEH_end_sha1_block_data_order_shaext wrt ..imagebase
        DD      $L$SEH_info_sha1_block_data_order_shaext wrt ..imagebase
        DD      $L$SEH_begin_sha1_block_data_order_ssse3 wrt ..imagebase
        DD      $L$SEH_end_sha1_block_data_order_ssse3 wrt ..imagebase
        DD      $L$SEH_info_sha1_block_data_order_ssse3 wrt ..imagebase
        DD      $L$SEH_begin_sha1_block_data_order_avx wrt ..imagebase
        DD      $L$SEH_end_sha1_block_data_order_avx wrt ..imagebase
        DD      $L$SEH_info_sha1_block_data_order_avx wrt ..imagebase
        DD      $L$SEH_begin_sha1_block_data_order_avx2 wrt ..imagebase
        DD      $L$SEH_end_sha1_block_data_order_avx2 wrt ..imagebase
        DD      $L$SEH_info_sha1_block_data_order_avx2 wrt ..imagebase
section .xdata rdata align=8
ALIGN   8
$L$SEH_info_sha1_block_data_order:
DB      9,0,0,0
        DD      se_handler wrt ..imagebase
$L$SEH_info_sha1_block_data_order_shaext:
DB      9,0,0,0
        DD      shaext_handler wrt ..imagebase
$L$SEH_info_sha1_block_data_order_ssse3:
DB      9,0,0,0
        DD      ssse3_handler wrt ..imagebase
        DD      $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase
$L$SEH_info_sha1_block_data_order_avx:
DB      9,0,0,0
        DD      ssse3_handler wrt ..imagebase
        DD      $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
$L$SEH_info_sha1_block_data_order_avx2:
DB      9,0,0,0
        DD      ssse3_handler wrt ..imagebase
        DD      $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase
